@Preamble{
"\input bibnames.sty"
# "\ifx \undefined \circled \def \circled #1{(#1)}\fi"
# "\ifx \undefined \reg \def \reg {\circled{R}}\fi"
# "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TODAES = "ACM Transactions on Design Automation of
Electronic Systems"}
@Article{Pedram:1996:PMI,
author = "Massoud Pedram",
title = "Power minimization in {IC} design: principles and
applications",
journal = j-TODAES,
volume = "1",
number = "1",
pages = "3--56",
month = jan,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p3-pedram/p3-pedram.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p3-pedram/",
abstract = "Low power has emerged as a principal theme in today's
electronics industry. The need for low power has caused
a major paradigm shift in which power dissipation is as
important as performance and area. This article
presents an in-depth survey of CAD methodologies and
techniques for designing low power digital CMOS
circuits and systems and describes the many issues
facing designers at architectural, logical, and
physical levels of design abstraction. It reviews some
of the techniques and tools that have been proposed to
overcome these difficulties and outlines the future
challenges that must be met to design low power, high
performance systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "adiabatic circuits; CMOS circuits; computer-aided
design of VLSI; dynamic power dissipation; energy-delay
product; gated clocks; layout; low power layout; low
power synthesis; lower-power design; power analysis and
estimation; power management; power minimization and
management; probabilistic analysis;
silicon-on-insulator technology; statistical sampling;
switched capacitance; switching activity; symbolic
simulation; synthesis; system design",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf VLSI (very large scale
integration)}; Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)};
Hardware --- Integrated Circuits --- General (B.7.0)",
}
@Article{Cheng:1996:AGF,
author = "Kwang-Ting Cheng and A. S. Krishnakumar",
title = "Automatic generation of functional vectors using the
extended finite state machine model",
journal = j-TODAES,
volume = "1",
number = "1",
pages = "57--79",
month = jan,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p57-cheng/p57-cheng.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p57-cheng/",
abstract = "We present a method of automatic generation of
functional vectors for sequential circuits. These
vectors can be used for design verification,
manufacturing testing, or power estimation. A
high-level description of the circuit in VHDL or C is
assumed available. Our method automatically transforms
the high-level description of a circuit in VHDL or C
into an extended finite state machine (EFSM) model that
is used to generate functional vectors. The EFSM model
is a generalization of the traditional state machine
model. It is a compact representation of models with
local data variables and preserves many nice properties
of a traditional state machine model. The theoretical
background of the EFSM model is addressed in this
article. Our method guarantees that the generated
vectors cover every statement in the high-level
description at least once. Experimental results show
that a set of comprehensive functional vectors for
sequential circuits with more than a hundred flip-flops
can be generated automatically in a few minutes of CPU
time using our prototype system.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Experimentation; Languages; Theory;
Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "automatic test generation; design verification;
extended finite state machines; functional testing",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Verification}; Hardware --- Logic Design
--- Design Styles (B.6.1): {\bf Sequential circuits};
Theory of Computation --- Computation by Abstract
Devices --- Models of Computation (F.1.1): {\bf
Automata}; Mathematics of Computing --- Discrete
Mathematics --- Graph Theory (G.2.2): {\bf Graph
algorithms}; Hardware --- Integrated Circuits ---
Reliability and Testing** (B.7.3): {\bf Testability**};
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Hardware description languages}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2)",
}
@Article{Chang:1996:USM,
author = "Yao-Wen Chang and D. F. Wong and C. K. Wong",
title = "Universal switch modules for {FPGA} design",
journal = j-TODAES,
volume = "1",
number = "1",
pages = "80--101",
month = jan,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p80-chang/p80-chang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p80-chang/",
abstract = "A switch module $M$ with $W$ terminals on each side is
said to be {\em universal\/} if every set of nets
satisfying the dimensional constraint (i.e., the number
of nets on each side of $M$ is at most $W$) is
simultaneously rout able through $M$. In this article,
we present a class of universal switch modules. Each of
our switch modules has $ 6 W$ switches and {\em
switch-module flexibility\/} three (i.e., $ F_S = 3$).
We prove that no switch module with less than $ 6 W$
switches can be universal. We also compare our switch
modules with those used in the Xilinx XC4000 family
FPGAs and the {\em antisymmetric\/} switch modules
(with $ F_S = 3$) suggested by Rose and Brown [1991].
Although these two kinds of switch modules also have $
F_S = 3$ and $ 6 W$ switches, we show that they are not
universal. Based on combinatorial counting techniques,
we show that each of our universal switch modules can
accommodate up to 25\% more routing instances, compared
with the XC4000-type switch module of the same size.
Experimental results demonstrate that our universal
switch modules improve routability at the chip level.
Finally, our work also provides a theoretical insight
into the important observation by Rose and Brown [1991]
(based on extensive experiments) that $ F_S = 3$ is
often sufficient to provide high routability.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Measurement;
Performance; Theory; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Gate arrays}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Placement and routing}",
}
@Article{Thakur:1996:SPF,
author = "Shashidhar Thakur and D. F. Wong",
title = "Series-parallel functions and {FPGA} logic module
design",
journal = j-TODAES,
volume = "1",
number = "1",
pages = "102--122",
month = jan,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p102-thakur/p102-thakur.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p102-thakur/",
abstract = "The need for a two-way interaction between logic
synthesis and FPGA logic module design has been
stressed recently. Having a logic module that can
implement many functions is a good idea only if one can
also give a synthesis strategy that makes efficient use
of this functionality. Traditionally, technology
mapping algorithms have been developed after the logic
architecture has been designed. We follow a dual
approach, by focusing on a specific technology mapping
algorithm, namely, the structural tree-based mapping
algorithm, and designing a logic module that can be
mapped efficiently by this algorithm. It is known that
the tree-based mapping algorithm makes optimal use of a
library of functions, each of which can be represented
by a tree of AND, OR, and NOT gates (series-parallel or
SP functions). We show how to design a SP function with
a minimum number of inputs that can implement all
possible SP functions with a specified number of
inputs. For instances, we demonstrate a seven-input SP
function that can implement all four-input SP
functions. Mapping results show that, on an average,
the number blocks of this function needed to map
benchmark circuits are 12\% less than those for Actel's
ACT1 logic modules. Our logic modules show a 4\%
improvement over ACT1, if the block count is scaled to
take into account the number of transistors needed to
implement different logic modules.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance;
Theory; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "field programmable gate arrays; series-parallel
technology mapping; tree-based technology mapping
algorithm; universal logic modules",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Gate arrays}; Hardware --- Logic
Design --- Design Styles (B.6.1): {\bf Combinational
logic}; Hardware --- Logic Design --- Design Aids
(B.6.3); Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)};
Mathematics of Computing --- Discrete Mathematics ---
Graph Theory (G.2.2): {\bf Trees}",
}
@Article{Thanvantri:1996:OFS,
author = "Venkat Thanvantri and Sartaj Sahni",
title = "Optimal folding of standard and custom cells",
journal = j-TODAES,
volume = "1",
number = "1",
pages = "123--143",
month = jan,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-1/p123-thanvantri/p123-thanvantri.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-1/p123-thanvantri/",
abstract = "We study the problem of folding an ordered list of
standard and custom cells into rows of a chip so as to
minimize either the routing area or the total chip
area. Nine versions of the folding problem are
formulated and fast polynomial time algorithms are
obtained for each. Two of our formulations correspond
to problems formulated in Paik and Sahni [1993] for the
folding of a stack of bit-slice components. Our
algorithms for these two formulations are
asymptotically superior to those of Paik and Sahni
[1993].",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Measurement;
Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "custom cell folding; layout area; standard cell
folding",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Gate arrays}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Layout}; Theory of Computation --- Analysis of
Algorithms and Problem Complexity --- Nonnumerical
Algorithms and Problems (F.2.2): {\bf Routing and
layout}",
}
@Article{Cong:1996:CLS,
author = "Jason Cong and Yuzheng Ding",
title = "Combinational logic synthesis for {LUT} based field
programmable gate arrays",
journal = j-TODAES,
volume = "1",
number = "2",
pages = "145--204",
month = apr,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p145-cong/p145-cong.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p145-cong/",
abstract = "The increasing popularity of the field programmable
gate-array (FPGA) technology has generated a great deal
of interest in the algorithmic study and tool
development for FPGA-specific design automation
problems. The most widely used FPGAs are LUT based
FPGAs, in which the basic logic element is a $K$-input
one-output lookup-table (LUT) that can implement any
Boolean function of up to $K$ variables. This unique
feature of the LUT has brought new challenges to logic
synthesis and optimization, resulting in many new
techniques reported in recent years. This article
summarizes the research results on combinational logic
synthesis for LUT based FPGAs under a coherent
framework. These results were dispersed in various
conference proceedings and journals and under various
formulations and terminologies. We first present
general problem formulations, various optimization
objectives and measurements, then focus on a set of
commonly used basic concepts and techniques, and
finally summarize existing synthesis algorithms and
systems. We classify and summarize the basic techniques
into two categories, namely, {\em logic optimization\/}
and {\em technology mapping}, and describe the existing
algorithms and systems in terms of how they use the
classified basic techniques. A comprehensive list of
references is compiled in the attached bibliography.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Measurement;
Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "area minimization; computer-aided design of VLSI;
decomposition; delay minimization; delay modeling;
FPGA; logic optimization; power minimization;
programmable logic; routing; simplification; synthesis;
system design; technology mapping",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Combinational logic}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf Automatic synthesis};
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Optimization}; Hardware --- Integrated Circuits ---
Types and Design Styles (B.7.1): {\bf Gate arrays};
Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}",
}
@Article{Middelhoek:1996:VEF,
author = "Peter F. A. Middelhoek and Sreeranga P. Rajan",
title = "From {VHDL} to efficient and first-time-right designs:
a formal approach",
journal = j-TODAES,
volume = "1",
number = "2",
pages = "205--250",
month = apr,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p205-middelhoek/p205-middelhoek.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p205-middelhoek/",
abstract = "In this article we provide a practical
transformational approach to the synthesis of correct
synchronous digital hardware designs from high-level
specifications. We do this while taking into account
the complete life cycle of a design from early
prototype to full custom implementation. Besides
time-to-market, both flexibility with respect to target
architecture and efficiency issues are addressed by the
methodology. The utilization of user-selected
behavior-preserving transformation steps ensures
first-time-right design while exploiting the
experience, flexibility, and creativity of the
designer. \par
To ensure that design transformations are indeed
behavior-preserving a novel mechanized approach to the
specification and verification of design
transformations on control data flow graphs which is
independent of a specific behavioral model or graph
size has been developed. \par
As a demonstration of an industrial application we use
a video processing algorithm needed for the conversion
from a line-interlaced to progressively scanned video
format. Both a video signal processor-based prototype
implementation as well as a very efficient full custom
implementation are developed starting from a single
high-level behavioral specification of the algorithm in
VHDL. Results are compared with those previously
obtained using different tools and methodologies.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Human Factors; Languages; Theory;
Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "CDFG; correctness by construction; design methodology;
rapid system prototyping; SFG; transformational design;
VHDL",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1): {\bf Arithmetic and logic units};
Hardware --- Register-Transfer-Level Implementation ---
Design (B.5.1): {\bf Control design}; Hardware ---
Register-Transfer-Level Implementation --- Design
(B.5.1): {\bf Data-path design}; Hardware ---
Register-Transfer-Level Implementation --- Design
(B.5.1): {\bf Styles}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Automatic synthesis}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Hardware description languages}; Hardware
--- Register-Transfer-Level Implementation --- Design
Aids (B.5.2): {\bf Verification}; Hardware --- Logic
Design --- Design Aids (B.6.3): {\bf Automatic
synthesis}; Hardware --- Logic Design --- Design Aids
(B.6.3): {\bf Hardware description languages}; Hardware
--- Logic Design --- Design Aids (B.6.3): {\bf
Optimization}; Hardware --- Logic Design --- Design
Aids (B.6.3): {\bf Verification}; Software ---
Programming Languages --- Language Classifications
(D.3.2): {\bf Applicative (functional) languages};
Software --- Programming Languages --- Language
Classifications (D.3.2): {\bf Data-flow languages};
Theory of Computation --- Logics and Meanings of
Programs --- Specifying and Verifying and Reasoning
about Programs (F.3.1): {\bf Mechanical verification};
Theory of Computation --- Mathematical Logic and Formal
Languages --- Mathematical Logic (F.4.1): {\bf
Mechanical theorem proving}; Computer Applications ---
Computer-Aided Engineering (J.6): {\bf Computer-aided
design (CAD)}; Hardware --- Register-Transfer-Level
Implementation --- Design Aids (B.5.2): {\bf
Optimization}; Software --- Software Engineering ---
Software/Program Verification (D.2.4): {\bf Correctness
proofs}; Hardware --- Logic Design --- Design Aids
(B.6.3): {\bf VHDL}",
}
@Article{Kolson:1996:ORA,
author = "David J. Kolson and Alexandru Nicolau and Nikil Dutt
and Ken Kennedy",
title = "Optimal register assignment to loops for embedded code
generation",
journal = j-TODAES,
volume = "1",
number = "2",
pages = "251--279",
month = apr,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p251-kolson/p251-kolson.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p251-kolson/",
abstract = "One of the challenging tasks in code generation for
embedded systems is register assignment. When more live
variables than registers exist, some variables will
necessarily be accessed from data memory. Because loops
are typically executed many times and are often
time-critical, good register assignment in loops is
exceedingly important as accessing data memory can
degrade performance. The issue of finding an optimal
register assignment to loops has been open for some
time. In this article, we present a technique for
optimal (i.e., spill minimizing) register assignment to
loops. First we present a technique for register
assignment to architecture styles that are
characterized by a consolidated register file. Then we
extend the technique to include architecture styles
that are characterized by distributed memories and/or a
combination of general- and special-purpose registers.
Experimental results demonstrate that although the
optimal algorithm may be computationally prohibitive,
heuristic versions obtain results with performance
better than that of an existing graph coloring
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Languages",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code generation; embedded systems; system design",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Compilers}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Optimization};
Software --- Programming Languages --- Processors
(D.3.4): {\bf Code generation}",
}
@Article{Prasad:1996:TRP,
author = "S. C. Prasad and K. Roy",
title = "Transistor reordering for power minimization under
delay constraint",
journal = j-TODAES,
volume = "1",
number = "2",
pages = "280--300",
month = apr,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-2/p280-prasad/p280-prasad.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-2/p280-prasad/",
abstract = "In this article we address the problem of optimization
of VLSI circuits to minimize power consumption while
meeting performance goals. We present a method of
estimating power consumption of a basic or complex CMOS
gate which takes the internal capacitances of the gate
into account. This method is used to select an ordering
of series-connected transistors found in CMOS gates to
achieve lower power consumption. The method is very
efficient when used by library-based design styles. We
describe a multipass algorithm that makes use of
transistor reordering to optimize performance and power
consumption of circuits, has a linear time complexity
per pass, and converges to a solution in a small number
of passes. Transformations in addition to transistor
reordering can be used by the algorithm. The algorithm
has been benchmarked on several large examples and the
results are presented.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "circuit optimization; critical path enumeration; gate
input reordering; power estimation; transistor
reordering",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Optimization}; Hardware --- Integrated Circuits
--- Types and Design Styles (B.7.1): {\bf VLSI (very
large scale integration)}",
}
@Article{Wolf:1996:OOC,
author = "Wayne Wolf",
title = "Object-oriented cosynthesis of distributed embedded
systems",
journal = j-TODAES,
volume = "1",
number = "3",
pages = "301--314",
month = jul,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p301-wolf/p301-wolf.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p301-wolf/",
abstract = "This article describes a new hardware-software
cosynthesis algorithm that takes advantage of the
structure inherent in an object-oriented specification.
The algorithm creates a distributed system
implementation with arbitrary topology, using the
object-oriented structure to partition functionality in
addition to scheduling and allocating processes.
Process partitioning is an especially important
optimization for such systems because the specification
will not, in general, take into account the process
structure required for efficient execution on the
distributed engine. The object-oriented specification
naturally provides both coarse-grained and fine-grained
partitions of the system. Our algorithm uses that
multilevel structure to guide synthesis. Experimental
results show that our algorithm takes advantage of the
object-oriented specification to quickly converge on
high-quality implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "distributed embedded systems; hardware-software
co-design; object-oriented co-synthesis",
subject = "Computer Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf
Microprocessor/microcomputer applications}; Computer
Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf Real-time and
embedded systems}",
}
@Article{Chow:1996:LPR,
author = "Sue-Hong Chow and Yi-Cheng Ho and TingTing Hwang and
C. L. Liu",
title = "Low power realization of finite state machines --- a
decomposition approach",
journal = j-TODAES,
volume = "1",
number = "3",
pages = "315--340",
month = jul,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p315-chow/p315-chow.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p315-chow/",
abstract = "We present in this article a new approach to the
synthesis problem for finite state machines with the
reduction of power dissipation as a design objective. A
finite state machine is decomposed into a number of
{\em coupled\/} submachines. Most of the time, only one
of the submachines will be activated which,
consequently, could lead to substantial savings in
power consumption. The key steps in our approach are:
(1) decomposition of a finite state machine into
submachines so that there is a high probability that
state transitions will be confined to the smaller of
the submachines most of the time, and (2) synthesis of
the coupled submachines to optimize the logic circuits.
Experimental results confirmed that our approach
produced very good results (in particular, for finite
state machines with a large number of states.)",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "decomposition of finite state machines; lower power
design; state assignment",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Sequential circuits}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf Automatic synthesis};
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Optimization}; Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)}",
}
@Article{Kagaris:1996:FAM,
author = "Dimitrios Kagaris and Spyros Tragoudas",
title = "A fast algorithm for minimizing {FPGA} combinational
and sequential modules",
journal = j-TODAES,
volume = "1",
number = "3",
pages = "341--351",
month = jul,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p341-kagaris/p341-kagaris.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p341-kagaris/",
abstract = "We present a quadratic-time algorithm for minimizing
the number of modules in an FPGA with combinational and
sequential modules (like the C-modules and S-modules of
the ACT2 and ACT3 architectures). The constraint is
that a combinational module can be combined with one
flip-flop in a single sequential module, only if the
combinational module drives no other combinational
modules. Our algorithm uses a minimum-cost flow
formulation to solve the problem with a significant
time improvement over a previous approach that used a
general linear program.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "field programmable gate arrays; retiming",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Automatic synthesis}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf Optimization}; Hardware
--- Integrated Circuits --- Types and Design Styles
(B.7.1): {\bf Gate arrays}",
}
@Article{Chang:1996:OCP,
author = "En-Shou Chang and Daniel D. Gajski and Sanjiv
Narayan",
title = "An optimal clock period selection method based on
slack minimization criteria",
journal = j-TODAES,
volume = "1",
number = "3",
pages = "352--370",
month = jul,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p352-chang/p352-chang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p352-chang/",
abstract = "An important decision in synthesizing a hardware
implementation from a behavioral description is
selecting the clock period to schedule the datapath
operations into control steps. Prior to scheduling,
most existing behavioral synthesis systems either
require the designer to specify the clock period
explicitly or require that the delays of the operators
used in the design be specified in multiples of the
clock period. An unfavorable choice of clock period
could result in operations being idle for a large
portion of the clock period and, consequently, affect
the performance of the synthesized design. In this
article, we demonstrate the effect of clock slack on
the performance of designs and present an algorithm to
find a slack-minimal clock period. We prove the
optimality of our method and apply it to several
examples to demonstrate its effectiveness in maximizing
design performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "clock period; clock slack; performance estimation;
scheduling",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2)",
}
@Article{Lopez:1996:EDP,
author = "Mario A. Lopez and Dinesh P. Mehta",
title = "Efficient decomposition of polygons into {L-shapes}
with application to {VLSI} layouts",
journal = j-TODAES,
volume = "1",
number = "3",
pages = "371--395",
month = jul,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p371-lopez/p371-lopez.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p371-lopez/",
abstract = "We present two practical algorithms for partitioning
circuit components represented by rectilinear polygons
so that they can be stored using the L-shaped corner
stitching data structure; that is, our algorithms
decompose a simple polygon into a set of nonoverlapping
L-shapes and rectangles by using horizontal cuts only.
The more general of our algorithms computes and optimal
configuration for a wide variety of optimization
functions, whereas the other computes a minimum
configuration of rectangles and L-shapes. Both
algorithms run in $ O(n + h \log h) $ time, where $n$
is the number of vertices in the polygon and $h$ is the
number of H-pairs. Because for VLSI data $h$ is small,
in practice these algorithms are linear in $n$.
Experimental results on actual VLSI data compare our
algorithms and demonstrate the gains in performance for
corner stitching (as measured by different objective
functions) obtained by using them instead of more
traditional rectangular partitioning algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "corner stitching; L-shapes; partition; rectangle;
rectilinear polygons",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Layout}; Theory of Computation ---
Analysis of Algorithms and Problem Complexity ---
Nonnumerical Algorithms and Problems (F.2.2): {\bf
Geometrical problems and computations}; Mathematics of
Computing --- Discrete Mathematics --- Graph Theory
(G.2.2): {\bf Graph algorithms}",
}
@Article{Moreno:1996:REU,
author = "R. Moreno and R. Hermida and M. Fern{\'a}ndez",
title = "Register estimation in unscheduled dataflow graphs",
journal = j-TODAES,
volume = "1",
number = "3",
pages = "396--403",
month = jul,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-3/p396-moreno/p396-moreno.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-3/p396-moreno/",
abstract = "A method for register number estimation in unscheduled
or partially scheduled dataflow graphs is presented.
The strategy consists of studying the probability that
an edge between two nodes crosses the boundary between
two control steps, and its is based on a model that
associates probabilities with the different scheduling
alternatives of each node. These probabilities are
computed by means of an analytic method that takes into
account the distribution of operations in the dataflow
graph and the hardware modules available in the
library. The results highlight that the estimation
method is very accurate because the error between the
estimated value and the real value is always within a
narrow margin.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "area estimation; high-level synthesis; probabilities;
register estimation; scheduling",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1): {\bf Data-path design}",
}
@Article{Cheng:1996:GLT,
author = "Kwang-Ting Cheng",
title = "Gate-level test generation for sequential circuits",
journal = j-TODAES,
volume = "1",
number = "4",
pages = "405--442",
month = oct,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p405-cheng/p405-cheng.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p405-cheng/",
abstract = "This paper discusses the gate-level automatic test
pattern generation (ATPG) methods and techniques for
sequential circuits. The basic concepts, examples,
advantages, and limitations of representative methods
are reviewed in detail. The relationship between
gate-level sequential circuit ATPG and the partial scan
design is also discussed.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Reliability; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "automatic test generation; IC testing; sequential
circuit test generation; testing",
subject = "Hardware --- Integrated Circuits --- Reliability and
Testing** (B.7.3); Hardware --- Integrated Circuits ---
Types and Design Styles (B.7.1)",
}
@Article{Langevin:1996:RTC,
author = "M. Langevin and E. Cerny",
title = "A recursive technique for computing lower-bound
performance of schedules",
journal = j-TODAES,
volume = "1",
number = "4",
pages = "443--455",
month = oct,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p443-langevin/p443-langevin.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p443-langevin/",
abstract = "We present a fast recursive technique for estimating
lower-bound performance of data path schedules. The
method relies on the determination of an ASAPUC a(s
Soon As Possible Under Constraint) time-step value for
each node of the DFG (Data-Flow Graph) that is based on
the ASAPUC values of its predecessor nodes. That is,
the lower-bound estimation is applied to each subgraph
permitting the derivation of a tight lower bound on the
performance of the complete DFG. Applying the greedy
lower-bound estimator of Rim and Jain [1994] to each
subgraph improves the complete lower bound in more than
50\% of the experiments reported in Rim and Jain
[1994], and the CPU time is only about twice as long.
The recursive methodology can be extended to exploit
other lower-bound techniques, for example, considering
other constraints such as the number of busses or
registers.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "dataflow graph; lower-bound on performance; microcode
optimization; resource constraints; scheduling",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2): {\bf Optimization}; Hardware
--- Register-Transfer-Level Implementation --- Design
Aids (B.5.2): {\bf Automatic synthesis}",
}
@Article{Sosic:1996:UAF,
author = "Rok Sosi{\=c} and Jun Gu and Robert R. Johnson",
title = "The {Unison} algorithm: fast evaluation of {Boolean}
expressions",
journal = j-TODAES,
volume = "1",
number = "4",
pages = "456--477",
month = oct,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 22 15:33:01 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p456-sosic/p456-sosic.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p456-sosic/",
abstract = "We present a Unison algorithm to evaluate arbitrarily
complex Boolean expressions. This novel algorithm,
based on the total differential of a Boolean function,
enables fast evaluation of Boolean expressions in
software. Any combination of Boolean operations can be
packed into the bits of one computer word and evaluated
in parallel by bitwise logical operations. Sample runs
of the Unison algorithm show that many Boolean
operations can evaluated in one clock cycle. The Unison
algorithm is able to evaluate Boolean expressions at an
execution speed that is comparable to compiled
evaluation while retaining the flexibility of
interpreted approaches. The algorithm lends itself well
to many practical applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance; Reliability;
Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Boolean differential; Boolean evaluation; Boolean
expressions; Unison algorithm",
subject = "Hardware --- Logic Design --- General (B.6.0); Theory
of Computation --- Analysis of Algorithms and Problem
Complexity --- Nonnumerical Algorithms and Problems
(F.2.2)",
}
@Article{Cong:1996:OWI,
author = "Jason Cong and Lei He",
title = "Optimal wiresizing for interconnects with multiple
sources",
journal = j-TODAES,
volume = "1",
number = "4",
pages = "478--511",
month = oct,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p478-cong/p478-cong.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p478-cong/",
abstract = "In this paper, we study the optimal wiresizing problem
for nets with multiple sources under the RC tree model
and the Elmore delay model. We decompose the routing
tree for a multisource net into the source subtree
(SST) and a set of loading subtrees (LSTs), and show
that the optimal wiresizing solution satisfies a number
of interesting properties, including: LST separability,
the LST monotone property, the SST local monotone
property, and the dominance property. Furthermore, we
study the optimal wiresizing problem using a variable
segment-division rather than an a priori fixed
segment-division as in all previous works and reveal
the bundled refinement property. These properties lead
to efficient algorithms to compute the optimal
solutions. We have tested our algorithm on nets
extracted from the multilayer layout for a
high-performance Intel microprocessor. Accurate SPICE
simulation shows that our methods reduce the average
delay by up to 23.5\% and the maximum delay by up to
37.8\%, respectively, for the submicron CMOS technology
when compared to the minimal wire width solution. In
addition, the algorithm based on the variable
segment-division yields a speedup of over 100$ \times $
time and does not lose any accuracy, when compared with
the algorithm based on the a priori fixed
segment-division.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bundled refinement; decomposition of multi-source
routing tree; dominance property; Elmore delay;
fidelity; high performance; interconnect optimization;
layout optimization; local refinement; multi-source
net; multi-source routing tree; optimal wiresizing;
variable segment-division",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Placement and routing}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Simulation}; Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)};
Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Layout}; Mathematics of Computing ---
Discrete Mathematics --- Graph Theory (G.2.2); Hardware
--- Integrated Circuits --- Types and Design Styles
(B.7.1); Hardware --- Integrated Circuits --- Design
Aids (B.7.2): {\bf SPICE}; Hardware --- Input/Output
and Data Communications --- Interconnections
(Subsystems) (B.4.3)",
}
@Article{Ganley:1996:RST,
author = "Joseph L. Ganley and James P. Cohoon",
title = "Rectilinear {Steiner} trees on a checkerboard",
journal = j-TODAES,
volume = "1",
number = "4",
pages = "512--522",
month = oct,
year = "1996",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1996-1-4/p512-ganley/p512-ganley.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1996-1-4/p512-ganley/",
abstract = "The rectilinear Steiner tree problem is to find a
minimum-length set of horizontal and vertical line
segments that interconnect a given set of points in the
plane. Here we study the {\em thumbnail rectilinear
Steiner tree\/} problem, where the input points are
drawn from a small integer grid. Specifically, we
devise a fully-set decomposition algorithm for
computing optimal thumbnail rectilinear Steiner trees.
We then present experimental results comparing the
performance of this algorithm with two existing
algorithms for computing optimal rectilinear Steiner
trees. The thumbnail rectilinear Steiner tree problem
has applications in VLSI placement algorithms, based on
geometric partitioning, global routing of
field-programmable gate arrays, and routing estimation
during floorplanning.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "exact algorithms; full-set decomposition; rectilinear
Steiner tree; routing",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Placement and routing}; Theory of
Computation --- Analysis of Algorithms and Problem
Complexity --- Nonnumerical Algorithms and Problems
(F.2.2): {\bf Geometrical problems and computations};
Mathematics of Computing --- Discrete Mathematics ---
Graph Theory (G.2.2): {\bf Graph algorithms};
Mathematics of Computing --- Discrete Mathematics ---
Graph Theory (G.2.2): {\bf Trees}",
}
@Article{Lin:1997:RDH,
author = "Youn-Long Lin",
title = "Recent developments in high-level synthesis",
journal = j-TODAES,
volume = "2",
number = "1",
pages = "2--21",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p2-lin/p2-lin.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p2-lin/",
abstract = "We survey recent developments in high level synthesis
technology for VLSI design. The need for higher-level
design automation tools are discussed first. We then
describe some basic techniques for various subtasks of
high-level synthesis. Techniques that have been
proposed in the past few years (since 1994) for various
subtasks of high-level synthesis are surveyed. We also
survey some new synthesis objectives including
testability, power efficiency, and reliability.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Languages; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design automation; design methodology; high level
synthesis; VLSI design",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1): {\bf Data-path design}; Hardware
--- Register-Transfer-Level Implementation --- Design
Aids (B.5.2): {\bf Automatic synthesis}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Hardware description languages}; Hardware
--- Register-Transfer-Level Implementation --- Design
Aids (B.5.2): {\bf Optimization}",
}
@Article{Gong:1997:MRH,
author = "Jie Gong and Daniel D. Gajski and Smita Bakshi",
title = "Model refinement for hardware-software codesign",
journal = j-TODAES,
volume = "2",
number = "1",
pages = "22--41",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p22-gong/p22-gong.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p22-gong/",
abstract = "Hardware-software codesign, which implements a given
specification with a set of system components such as
ASICs and processors, includes several key tasks such
as system component allocation, functional
partitioning, quality metrics estimation, and model
refinement. In this work, we focus on the model
refinement task which transforms a specification from
an original functional model to a refined
implementation model. First, we categorize several
commonly used implementation models and describe a set
of refinement procedures to transform a specification
to each of these implementation models. We also present
a set of experimental results to compare the
implementation models and to demonstrate how the
proposed approach can be used to explore different
implementation styles.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Languages;
Measurement",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "functional model; implementation model; model
refinement; software-hardware codesign",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}; Computer
Systems Organization --- General (C.0): {\bf
Hardware/software interfaces}; Hardware ---
Register-Transfer-Level Implementation --- General
(B.5.0); Computer Systems Organization --- General
(C.0): {\bf Modeling of computer architecture}",
}
@Article{deAbreuMoreira:1997:ADC,
author = "Dilvan {de Abreu Moreira} and Les T. Walczowski",
title = "{AGENTS} a distributed client-server system for leaf
cell generation",
journal = j-TODAES,
volume = "2",
number = "1",
pages = "42--61",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:28:35 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p42-moreira/p42-moreira.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p42-moreira/",
abstract = "The AGENTS system is a set of programs designed to
generate automatically the mask-level layout of full
custom CMOS, BICMOS, and bipolar leaf cells. The system
is formed from four sever programs: the placer, router,
database, and broker. \par
The placer places components in a cell, the router
wires the circuits sent to it, the database stores all
the information that is dependent upon the fabrication
process, such as the design rules, and the Broker makes
the services of the other servers available. \par
These servers communicate over a computer network using
the TCP/IP Internet Protocol. The Placer server
receives from its client the description and netlist of
the circuit to be generated using EDIF (Electronic
Design Interchange Format.) The output to its client is
the mask layout of the circuit, again codified in EDIF.
The concept of agents as software components which have
the ability to communicate and cooperate with each
other is at the heart of the AGENTS system. This
concept is not only used at the higher level, for the
four servers, but at a lower level as well, inside the
Router and Placer servers, where small relatively
simple agents work together to accomplish complex
tasks. These small agents are responsible for all the
reasoning carried out by the two servers, as they hold
the basic inference routines and the knowledge needed
by the servers. The system's philosophy is that
competence should emerge out of the collective behavior
of a large number of relatively simple agents. In
addition and integrated to these small agents, the
system uses a genetic algorithm to improve components'
placement before routing.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "client/server model; genetic algorithms; software
agents",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Placement and routing}; Hardware ---
Integrated Circuits --- Types and Design Styles
(B.7.1)",
}
@Article{Esbensen:1997:PDI,
author = "Henrik Esbensen and Ernest S. Kuh",
title = "A performance-driven {IC\slash MCM} placement
algorithm featuring explicit design space exploration",
journal = j-TODAES,
volume = "2",
number = "1",
pages = "62--80",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-1/p62-esbensen/p62-esbensen.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-1/p62-esbensen/",
abstract = "A genetic algorithm for building-block placement of
ICs and MCMs is presented that simultaneously minimizes
layout area and an Elmore-based estimate of the maximum
path delay while trying to meet a target aspect ratio.
Explicit design space exploration is performed by using
a vector-valued, 3-dimensional cost function and
searching for a set of distinct solutions representing
the best trade-offs of the cost dimensions. From the
output solutions, the designer can choose the solution
with the preferred trade-off. In contrast to existing
approaches, the required properties of the output
solutions are specified without using weights or
bounds. Consequently, the practical problems of
specifying these quantities are eliminated. Promising
experimental results are obtained for various placement
problems, including a real-world design. Solution sets
representing good, balanced cost trade-offs are found
using a reasonable amount of runtime. Furthermore, the
performance is shown to be comparable to that of
simulated annealing in the special case of
1-dimensional optimization, in which direct comparison
is possible.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design space exploration; timing-driven building-block
placement",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf VLSI (very large scale
integration)}; Hardware --- Integrated Circuits ---
Design Aids (B.7.2): {\bf Placement and routing};
Computing Methodologies --- Artificial Intelligence ---
Problem Solving, Control Methods, and Search (I.2.8):
{\bf Heuristic methods}",
}
@Article{Lin:1997:STV,
author = "Yann-Rue Lin and Cheng-Tsung Hwang and Allen C.-H.
Wu",
title = "Scheduling techniques for variable voltage low power
designs",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "81--97",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p81-lin/p81-lin.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p81-lin/",
abstract = "This paper presents an integer linear programming
(ILP) model and a heuristic for the variable voltage
scheduling problem. We present the variable voltage
scheduling techniques that consider in turn timing
constraints alone, resource constraints alone, and
timing and resource constraints together for design
space exploration. Experimental results show that our
heuristic produces results competitive with those of
the ILP method in a fraction of the run-time. The
results also show that a wide range of design
alternatives can be generated using our design space
exploration method. Using different cost/delay
combinations, power consumption in a single design can
differ by as much as a factor of 6 when using mixed
3.3V and 5V supply voltages.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "high-level synthesis; lower power design; scheduling;
variable voltage",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2); Hardware ---
Register-Transfer-Level Implementation --- Design
(B.5.1): {\bf Styles}; Hardware --- Integrated Circuits
--- Types and Design Styles (B.7.1): {\bf VLSI (very
large scale integration)}",
}
@Article{Fummi:1997:FDT,
author = "F. Fummi and U. Rovati and D. Sciuto",
title = "Functional design for testability of control-dominated
architectures",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "98--122",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p98-fummi/p98-fummi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p98-fummi/",
abstract = "Control-dominated architectures are usually described
in a hardware description language (HDL) by means of
interacting FSMs. A VHDL or Verilog specification can
be translated into an interacting FSM (IFSM)
representation as described here. The IFSM model allows
us to approach the testable synthesis problem at the
level of each FSM. The functionality is modified by the
addition of transparency to data flow. The complete
testability of the IFSM implementation is thus achieved
by connecting fully testable implementations of each
modified FSM. In this way, test sequences separately
generated for each FSM are directly applied to the IFSM
to achieve complete fault coverage. The addition of
test functionality to each FSM description, and its
simultaneous synthesis with the FSM functionality,
produces a lower area overhead than that necessary for
the application of a partial-scan technique. Moreover,
the test generation problem is highly simplified since
it is reduced to the test generation for each separate
FSM.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Measurement; Performance; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "functional testing; interacting FSMs",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Reliability and Testing** (B.5.3): {\bf Test
generation**}; Hardware --- Register-Transfer-Level
Implementation --- Reliability and Testing** (B.5.3):
{\bf Testability**}; Hardware --- Logic Design ---
Design Aids (B.6.3): {\bf Hardware description
languages}",
}
@Article{Kormicki:1997:PLS,
author = "Maciek Kormicki and Ausif Mahmood and Bradley S.
Carlson",
title = "Parallel logic simulation on a network of workstations
using parallel virtual machine",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "123--134",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p123-kormicki/p123-kormicki.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p123-kormicki/",
abstract = "This paper explores parallel logic simulation on a
network of workstations using a parallel virtual
machine (PVM). A novel parallel implementation of the
centralized-time event-driven logic simulation
algorithm is carried out such that no global
controlling workstation is needed to synchronize the
advance of simulation time. Further advantages of our
new approach include a random partitioning of the
circuit onto available workstations and a pipelined
execution of the different phases of the simulation
algorithm. To achieve a better load balance, we employ
a semioptimistic scheme for gate evaluations (in
conjunction with a centralized-time algorithm) such
that no rollback is required. The performance of this
implementation has been evaluated using the ISCAS
benchmark circuits. Speedups improve with the size of
the circuit and the activity level in the circuit.
Analyses of the communication overhead show that the
techniques developed here will yield even higher gains
as newer networking technologies like ATM are employed
to connect workstations.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "distributed computing; parallel logic simulation; PVM;
synchronous simulation",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Simulation}; Hardware --- Integrated Circuits ---
Design Aids (B.7.2): {\bf Simulation}",
}
@Article{Yang:1997:HFM,
author = "Cheng-Hsing Yang and Chia-Chun Tsai and Jan-Ming Ho
and Sao-Jie Chen",
title = "Hmap: a fast mapper for {EPGAs} using extended {GBDD}
hash tables",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "135--150",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p135-yang/p135-yang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p135-yang/",
abstract = "A fast and efficient algorithm for technology mapping
of electrically programmable gate arrays (EPGAs) is
proposed. This Hmap algorithm covers the Boolean
network with programmed logic modules bottom-up. The
covering operation is based on collapsing the fanins of
a node to form a bigger supernode such that fewer
clusters are needed to be detected. Then Boolean
matching is used to detect whether the collapsed
supernode can be mapped into a logic module by looking
up an extended GBDD hash table. The use of this table
look-up matching can shorten the matching time
significantly. As shown in the experiments, the average
running time of Hmap is 20 times faster than that of
MIS-pga2.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2); Hardware --- Integrated Circuits --- Types and
Design Styles (B.7.1): {\bf Gate arrays}",
}
@Article{Mak:1997:BLM,
author = "Wai-Kei Mak and D. F. Wong",
title = "Board-level multiterminal net routing for {FPGA-based}
logic emulation",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "151--167",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p151-mak/p151-mak.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p151-mak/",
abstract = "We consider a board-level routing problem applicable
to FPGA-based logic emulation systems such as the
Realizer System [Varghese et al. 1993] and the
Enterprise Emulation System [Maliniak 1992]
manufactured by Quickturn Design Systems. Optimal
algorithms have been proposed for the case where all
nets are two-terminal nets [Chan and Schlag 1993; Mak
and Wong 1995]. We show how multiterminal nets can be
handled by decomposition into two-terminal nets. We
show that the multiterminal net decomposition problem
can be modeled as a bounded-degree hypergraph-to-graph
transformation problem where hyperedges are transformed
to spanning trees. A network flow-based algorithm that
solves both problems is proposed. It determines if
there is a feasible decomposition and gives one
whenever such a decomposition exists.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "board-level routing; crossbars; field programmable
gate arrays; logic emulation; multi-terminal net
decomposition",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Gate arrays}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Placement and routing}; Hardware --- Integrated
Circuits --- Design Aids (B.7.2): {\bf Verification}",
}
@Article{Kahng:1997:ARI,
author = "Andrew B. Kahng and Sudhakar Muddu",
title = "Analysis of {RC} interconnections under ramp input",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "168--192",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p168-kahng/p168-kahng.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p168-kahng/",
abstract = "We give new methods for calculating the time-domain
response for a finite-length distributed {\em RC\/}
line that is stimulated by a ramp input. The following
are our contributions. First, we obtain the solution of
the diffusion equation for a seminfinite distributed
{\em RC\/} line with ramp input. We then present a
general and, in the limit, {\em exact\/} approach to
compute the time-domain response for finite-length {\em
RC\/} lines under ramp input by summing distinct
diffusions starting at either end of the line. Next, we
obtain analytical expressions for the finite
time-domain voltage response for an open-ended finite
{\em RC\/} line and for a finite {\em RC\/} line with
capacitive load. The delay estimates using this method
are very close to SPICE-computing delays. Finally, we
present a general recursive equation for computing the
higher-order diffusion components due to reflections at
the source and load ends. Future work extends our
method to response computations in general
interconnection trees by modeling both reflection and
transmission coefficients at discontinuities.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance; Theory;
Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "diffusion equation analysis; ramp input response; VLSI
interconnects",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf VLSI (very large scale
integration)}; Hardware --- Integrated Circuits ---
Design Aids (B.7.2): {\bf Layout}",
}
@Article{Benini:1997:SBM,
author = "Luca Benini and Giovanni {De Micheli}",
title = "A survey of {Boolean} matching techniques for library
binding",
journal = j-TODAES,
volume = "2",
number = "3",
pages = "193--226",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p193-benini/p193-benini.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p193-benini/",
abstract = "When binding a logic network to a set of cells, a
fundamental problem is recognizing whether a cell can
implement a portion of the network. Boolean matching
means solving this task using a formalism based on
Boolean algebra. In its simplest form, Boolean matching
can be posed as a tautology check. We review several
approaches to Boolean matching as well as to its
generalization to cases involving {\em don't care\/}
conditions and its restriction to specific libraries
such as those typical of anti-fuse based FPGAs. We then
present a general formulation of Boolean matching
supporting multiple-output logic cells.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1)",
}
@Article{Johnson:1997:DSM,
author = "Mark C. Johnson and Kaushik Roy",
title = "Datapath scheduling with multiple supply voltages and
level converters",
journal = j-TODAES,
volume = "2",
number = "3",
pages = "227--248",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p227-johnson/p227-johnson.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p227-johnson/",
abstract = "We present an algorithm called MOVER (Multiple
Operating Voltage Energy Reduction) to minimize
datapath energy dissipation through use of multiple
supply voltages. In a single voltage design, the
critical path length, clock period, and number of
control steps limit minimization of voltage and power.
Multiple supply voltages permit localized voltage
reductions to take up remaining schedule slack. MOVER
initially finds one minimum voltage for an entire
datapath. It then determines a second voltage for
operations where there is still schedule slack. New
voltages con be introduced and minimized until no
schedule slack remains. MOVER was exercised for a
variety of DSP datapath examples. Energy savings ranged
from 0\% to 50\% when comparing dual to single voltage
results. The benefit of going from two to three
voltages never exceeded 15\%. Power supply costs are
not reflected in these savings, but a simple analysis
shows that energy savings can be achieved even with
relatively inefficient DC-DC converters. Datapath
resource requirements were found to vary greatly with
respect to number of supplies. Area penalties ranged
from 0\% to 170\%. Implications of multiple voltage
design for IC layout and power supply requirements are
discussed.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "datapath scheduling; DSP; high-level synthesis; level
conversion; low power design; multiple voltage; power
optimization; scheduling",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1): {\bf Data-path design}; Hardware
--- Register-Transfer-Level Implementation --- Design
Aids (B.5.2): {\bf Optimization}; Mathematics of
Computing --- Numerical Analysis --- Optimization
(G.1.6): {\bf Integer programming}",
}
@Article{Yalcin:1997:EPC,
author = "Hakan Yalcin and John P. Hayes",
title = "Event propagation conditions in circuit delay
computation",
journal = j-TODAES,
volume = "2",
number = "3",
pages = "249--280",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p249-yalcin/p249-yalcin.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p249-yalcin/",
abstract = "Accurate and efficient computation of delays is a
central problem in computer-aided design of complex
VLSI circuits. Delays are determined by events (signal
transitions) propagated from the inputs of a circuit to
its outputs, so precise characterization of event
propagation is required for accurate delay computation.
Although many different propagation conditions (PCs)
have been proposed for delay computation, their
properties and relationships have been far from clear.
We present a systematic analysis of delay computation
based on a series of waveform models that capture
signal behavior rigorously at different levels of
details. The most general model, called the exact of W0
model, specifies each event occurring in a circuit
signal. A novel method is presented that generates
approximate waveforms by progressively eliminating
signal values from the exact model. For each waveform
model, we drive the PCs that correctly capture the
requirements under which an event propagates along a
path. The waveform models and their PCs are shown to
form a well-defined hierarchy, which provides a means
to trade accuracy for computational effort. The
relationships among the derived PCs and existing ones
are analyzed in depth. It is proven that though many
PCs, such as the popular floating mode condition,
produce a correct upper bound on the circuit delay,
they can fail to recognize event propagation in some
instances. This analysis further enables us to derive
new and useful PCs. We describe such a PC, called safe
static. Experimental results demonstrate that safe
static provides an excellent accuracy/efficiency
tradeoff.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Performance; Theory; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "delay computation; event propagation; false path; path
sensitization; propagation condition; timing analysis;
waveform modeling",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Verification}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf Verification}",
}
@Article{Thadikaran:1997:ACB,
author = "Paul Thadikaran and Sreejit Chakravarty and Janak
Patel",
title = "Algorithms to compute bridging fault coverage of
{IDDQ} test sets",
journal = j-TODAES,
volume = "2",
number = "3",
pages = "281--305",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-3/p281-thadikaran/p281-thadikaran.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-3/p281-thadikaran/",
abstract = "We present two algorithms, called list-based scheme
and tree-based scheme, to compute bridging fault (BF)
coverage of {\em I DDQ\/} tests. These algorithms use
the novel ideal of ``indistinguishable pairs,'' which
makes it more efficient and versatile than known fault
simulation algorithms. Unlike known algorithms, the two
algorithms can be used for combinational as well as
sequential circuits and for arbitrary sets of BFs.
Experiments show that the tree-based scheme is, in
general, better than the list-based scheme. But the
list-based scheme is better for some classes of
faults.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Simulation}",
}
@Article{Xu:1997:LDR,
author = "Min Xu and Fadi J. Kurdahi",
title = "Layout-driven {RTL} binding techniques for high-level
synthesis using accurate estimators",
journal = j-TODAES,
volume = "2",
number = "4",
pages = "312--343",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p312-xu/p312-xu.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p312-xu/",
abstract = "The importance of effective and efficient accounting
of layout effects is well established in High-Level
Synthesis (HLS), since it allows more realistic
exploration of the design space and the generation of
solutions with predictable metrics. This feature is
highly desirable in order to avoid unnecessary
iterations through the design process. In this article,
we address the problem of layout-driven
register-transfer-level (RTL) binding as this step has
a direct relevance to the final performance of the
design. By producing not only an RTL design but also an
approximate physical topology of the chip-level
implementation, we ensure that the solution will
perform at the predicted metric once implemented, thus
avoiding unnecessary delays in the design process.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1); Hardware --- Integrated Circuits
--- Types and Design Styles (B.7.1): {\bf Gate arrays};
Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Layout}",
}
@Article{Munch:1997:EIB,
author = "Michael M{\"u}nch and Norbert Wehn and Manfred
Glesner",
title = "An efficient {ILP-based} scheduling algorithm for
control-dominated {VHDL} descriptions",
journal = j-TODAES,
volume = "2",
number = "4",
pages = "344--364",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p344-munch/p344-munch.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p344-munch/",
abstract = "To adopt behavioral synthesis techniques in existing
design flows, the synthesis methodology must provide
the designer with a mechanism to specify a component's
interface timing. This will permit pre- and
postsynthesis validation through cosimulation with
other subsystems or even through formal verification.
In control-flow dominated designs, additional timing
constraints will result in a complex
specification/constraint system for which the
scheduling problem has been shown to be NP-complete. In
this article, we present a mathematical framework for
solving a special instance of the scheduling problem in
control-flow dominated behavioral VHDL descriptions
given that the timing of I/O signals has been
completely or partially specified. It is based on a
code-transformation approach that fully preserves the
VHDL semantics. The scheduling problem is mapped onto
an integer linear program (ILP) solvable in polynomial
time assuming a restricted partial order on selected
statements. It captures both control-flow and timing
constraints in a single model and also exploits
dataflow information to optimize the statement sequence
across basic block boundaries.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1): {\bf Control design}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Automatic synthesis}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Optimization}",
}
@Article{Freund:1997:CEA,
author = "L. Freund and M. Israel and F. Rousseau and J. M.
Berg{\'e} and M. Auguin and C. Belleudy and G.
Gogniat",
title = "A codesign experiment in acoustic echo cancellation
{GMDF}",
journal = j-TODAES,
volume = "2",
number = "4",
pages = "365--383",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p365-freund/p365-freund.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p365-freund/",
abstract = "Continuous advances in processor and ASIC technologies
enable the integration of more and more complex
embedded systems. Embedded systems have become
commonplace in recent years. Since their
implementations generally require the use of
heterogeneous resources (e.g., processor cores, ASICs)
in one system with hard design constraints, the
importance of hardware/software codesign methodologies
increases steadily. HW/SW codesign approaches consist
generally of HW/SW partitioning and scheduling,
constrained code generation, and hardware and interface
synthesis. This article presents the codesign of an
industrial experiment in acoustic echo cancellation
(GMDF algorithm); and emphasizes the partitioning and
communication synthesis steps. This experiment brings
to light interesting problems such as data and program
distribution between system memories and the modeling
of communications in the partitioning process",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Computer Systems Organization --- Special-Purpose and
Application-Based Systems (C.3)",
}
@Article{Panda:1997:MDO,
author = "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru
Nicolau",
title = "Memory data organization for improved cache
performance in embedded processor applications",
journal = j-TODAES,
volume = "2",
number = "4",
pages = "384--409",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p384-panda/p384-panda.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p384-panda/",
abstract = "Code generation for embedded processors opens up the
possibility for several performance optimization
techniques that have been ignored by traditional
compilers due to compilation time constraints. We
present techniques that take into account the
parameters of the data caches for organizing scalar and
array variables declared in embedded code into memory,
with the objective of improving data cache performance.
We present techniques for clustering variables to
minimize compulsory cache misses, and for solving the
memory assignment problem to minimize conflict cache
misses. Our experiments with benchmark code kernels
from DSP and other domains on the CW4001 embedded
processor from LSI Logic indicate significant
improvements in data cache performance by the
application of our memory organization technique.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "cache memory; data cache; memory synthesis; system
design; system synthesis",
subject = "Hardware --- Memory Structures --- Design Styles
(B.3.2): {\bf Cache memories}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Compilers}",
}
@Article{Tomiyama:1997:CPT,
author = "Hiroyuki Tomiyama and Hiroto Yasuura",
title = "Code placement techniques for cache miss rate
reduction",
journal = j-TODAES,
volume = "2",
number = "4",
pages = "410--429",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-4/p410-tomiyama/p410-tomiyama.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-4/p410-tomiyama/",
abstract = "In the design of embedded systems with cache memories,
it is important to minimize the cache miss rates to
reduce power consumption of the systems as well as
improve the performance. In this article, we propose
two code placement methods (a simplified method and a
refined one) to reduce miss rates of instruction
caches. We first define a simplified code placement
problem without an attempt to minimize the code size.
The problem is formulated as an integer linear
programming (ILP) problem, by which an optimal
placement can be found. Experimental results show that
the simplified method reduces cache misses by an
average of 30\% (max. 77\%). However, the code size
obtained by the simplified method tends to be large,
which inevitably leads to a larger memory size. In
order to overcome this limitation, we further propose a
refined code placement method in which the code size
provided by the system designers must be satisfied. The
effectiveness of the refined method is also
demonstrated.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Code generation}; Hardware --- Control
Structures and Microprogramming --- Microprogram Design
Aids (B.1.4): {\bf Languages and compilers}; Software
--- Programming Languages --- Processors (D.3.4): {\bf
Optimization}; Hardware --- Control Structures and
Microprogramming --- Microprogram Design Aids (B.1.4):
{\bf Optimization}",
}
@Article{Johnson:1998:MAS,
author = "E. W. Johnson and J. B. Brockman",
title = "Measurement and analysis of sequential design
processes",
journal = j-TODAES,
volume = "3",
number = "1",
pages = "1--20",
month = jan,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p1-johnson/p1-johnson.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p1-johnson/",
abstract = "As design processes continue to increase in complexity
it is important to base process-improvement decisions
on quantitative analysis. We describe the development
of an analytical approach for evaluating sequential
design-process completion time and for determining the
sensitivities of design time with respect to individual
task durations and transition probabilities. Techniques
are also detailed for collecting process metadata and
calibrating a design process model. Example
applications illustrate the use of the methodology in
analyzing and improving software and hardware design
processes.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Documentation; Human Factors; Management;
Measurement",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "management science; sensitivity analysis; workflow",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6); Computing Milieux --- Computers and Education
--- Computer and Information Science Education
(K.3.2)",
}
@Article{Khordoc:1998:SVA,
author = "K. Khordoc and E. Cerny",
title = "Semantics and verification of action diagrams with
linear timing",
journal = j-TODAES,
volume = "3",
number = "1",
pages = "21--50",
month = jan,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p21-khordoc/p21-khordoc.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p21-khordoc/",
abstract = "Specifications containing linear timing constraints,
such as found in action diagrams (timing diagrams)
defining interface behaviors, are often used in
practice. Although efficient $ O(n 3) $ shortest path
algorithms exist for computing the minimum and maximum
time distances between actions, subject to the timing
constraints, there is so far no accurate method that
can decide (a) whether a specification of this kind is
realizable (i.e., can be simulated by a causal system),
and (b) given the action diagrams of the interfaces of
two or more communicating systems, whether the systems
implementing such independent specifications will
correctly interoperate (i.e., satisfy the respective
protocols and timing assumptions). First we illustrate
the weakness of existing action diagram verification
techniques: the causality issue is not addressed, and
the proposed methods to answer the compatibility
(interoperability) question yield false negative
answers in many practical situations. We then define
the meaning of causality in an action diagram
specification and state a set of sufficient conditions
for causality to hold. This development then leads to
an exact procedure for the verification of the
interface compatibility of communicating action
diagrams. the results are illustrated on a practical
example.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Theory; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "causality; compatibility of interfaces; hardware
interfaces; timing diagrams; timing verification",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2); Software --- Software Engineering ---
Requirements/Specifications (D.2.1)",
}
@Article{Liao:1998:NVC,
author = "S. Liao and K. Keutzer and S. Tjiang and S. Devadas",
title = "A new viewpoint on code generation for directed
acyclic graphs",
journal = j-TODAES,
volume = "3",
number = "1",
pages = "51--75",
month = jan,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p51-liao/p51-liao.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p51-liao/",
abstract = "We present a new viewpoint on code generation for
directed acyclic graphs (DAGs). Our formulation is
based on {\em binate covering}, the problem of
satisfying, with minimum cost, a set of disjunctive
clauses, and can take into account commutativity of
operators and of the machine model. An important
contribution of this work is a set of necessary and
sufficient conditions for a valid schedule to be
derived, based on the notion of {\em worms\/} and {\em
worm-partitions}. This set of conditions can be
compactly expressed with clauses that relate scheduling
to code selection. For the case of one-register
machines, we can derive clauses that lead to generation
of optimal code for the DAG. Recent advances in exact
binate covering algorithms allows us to use this
strategy to generate optimal code for large basic
blocks. The optimal code generated by our algorithm
results in significant reductions in overall code
size.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "binate covering; code generation; directed acyclic
graphs",
subject = "Software --- Programming Languages --- Processors
(D.3.4); Mathematics of Computing --- Discrete
Mathematics --- Graph Theory (G.2.2)",
}
@Article{Shi:1998:CCT,
author = "C.-J. Shi and J. A. Brzozowski",
title = "Cluster-cover a theoretical framework for a class of
{VLSI-CAD} optimization problems",
journal = j-TODAES,
volume = "3",
number = "1",
pages = "76--107",
month = jan,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-1/p76-shi/p76-shi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-1/p76-shi/",
abstract = "This article introduces a mathematical framework
called cluster-cover. We show that this framework
captures the combinatorial structure of a class of VLSI
design optimization problems, including two-level logic
minimization, constrained encoding, multilayer
topological planar routing, application timing
assignment for delay-fault testing, and minimization of
monitoring logic for BIST enhancement. These apparently
unrelated problems can all be cast into two
metaproblems in our framework: finding a maximum
cluster and finding a minimum cover. We describe
paradigms for developing algorithms for these problems.
First, a simple heuristic called greedy peeling is
presented and characterized. We derive sufficient
conditions that guarantee optimum solutions by greedy
peeling. We generalize the performance analysis of a
multilayer topological planar routing heuristic to
greedy peeling for the general cluster-cover problems.
We propose a performance bound of greedy set covering
that can be computed efficiently for a given problem
instance; this bound is much tighter than the
previously known bounds. Second, prime covering ---
originally developed for logic minimization --- is
generalized to finding exact solutions for
cluster-cover problems. Previously, only the connection
between logic minimization and constrained encoding was
known.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "cluster-cover; logic minimization; NP-completeness;
self-checking logic design; state assignment;
topological routing",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3);
Hardware --- Integrated Circuits --- General (B.7.0);
Theory of Computation --- Analysis of Algorithms and
Problem Complexity --- Nonnumerical Algorithms and
Problems (F.2.2)",
}
@Article{Hsiung:1998:IIC,
author = "Pao-Ann Hsiung and Chung-Hwang Chen and Trong-Yen Lee
and Sao-Jie Chen",
title = "{ICOS}: an intelligent concurrent object-oriented
synthesis methodology for multiprocessor systems",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "109--135",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p109-hsiung/p109-hsiung.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p109-hsiung/",
abstract = "The design of multiprocessor architectures differs
from uniprocessor systems in that the number of
processors and their interconnection must be
considered. This leads to an enormous increase in the
design-space exploration time, which is exponential in
the total number of system components. The methodology
proposed here, called {\em Intelligent Concurrent
Object-Oriented Synthesis\/} (ICOS) methodology, makes
feasible the synthesis of complex multiprocessor
systems through the application of several techniques
that speed up the design process. ICOS is based on {\em
Performance Synthesis Methodology\/} (PSM), a recently
proposed object-oriented system-level design
methodology. Four major techniques: object-oriented
design, fuzzy design-space exploration, concurrent
design, and intelligent reuse of complete subsystems
are integrated in ICOS. First, object-oriented modeling
and design, through the use of object-oriented
relationships and operators, make the whole design
process manageable and maintainable in ICOS. Second,
fuzzy comparison applied to the specializations or
instances of components reduces the exponential growth
of design-space exploration in ICOS. Third, independent
components from different design alternatives are
synthesized in parallel; this design concurrency
shortens the overall design time. Lastly, the
resynthesis of complete subsystems can be avoided
through the application of learning, thus making the
methodology intelligent enough to reuse previous design
configurations. Experiments show that all these applied
techniques contribute to the synthesis efficiency and
the degree of automation in ICOS.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "concurrent object-oriented system-level synthesis;
fuzzy design-space exploration; learning",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}; Computing
Methodologies --- Artificial Intelligence --- Learning
(I.2.6): {\bf Knowledge acquisition}; Computing
Methodologies --- Artificial Intelligence --- Learning
(I.2.6): {\bf Analogies}; Computing Methodologies ---
Artificial Intelligence --- Deduction and Theorem
Proving (I.2.3): {\bf Deduction}; Computer Systems
Organization --- Processor Architectures --- Multiple
Data Stream Architectures (Multiprocessors) (C.1.2)",
}
@Article{Araujo:1998:CGF,
author = "Guido Araujo and Sharad Malik",
title = "Code generation for fixed-point {DSPs}",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "136--161",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p136-araujo/p136-araujo.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p136-araujo/",
abstract = "This paper examines the problem of code-generation for
Digital Signal Processors (DSPs). We make two major
contributions. First, for an important class of DSP
architectures, we propose an optimal $ O(n) $ algorithm
for the tasks of register allocation and instruction
scheduling for expression trees. Optimality is
guaranteed by sufficient conditions derived from a
structural representation of the processor Instruction
Set Architecture (ISA). Second, we develop heuristics
for the case when basic blocks are Directed Acyclic
Graphs (DAGs).",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code generation; register allocation; scheduling",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Optimization}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Code
generation}",
}
@Article{Tiruvuri:1998:ELB,
author = "Giri Tiruvuri and Moon Chung",
title = "Estimation of lower bounds in scheduling algorithms
for high-level synthesis",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "162--180",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p162-tiruvuri/p162-tiruvuri.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p162-tiruvuri/",
abstract = "To produce efficient design, a high-level synthesis
system should be able to analyze a variety of
cost-performance tradeoffs. The system can use
lower-bound performance estimated methods to identify
and prune inferior designs without producing complete
designs. We present a lower-bound performance estimate
method that is not only faster than existing methods,
but also produces better lower bounds. In most cases,
the lower bound produced by our algorithm is tight.
\par
Scheduling algorithms such as branch-and-bound need
fast and effective lower-bound estimate methods, often
for a large number of partially scheduled dataflow
graphs, to reduce the search space. We extend our
method to efficiently estimate completion time of
partial schedules. This problem is not addressed by
existing methods in the literature. Our lower-bound
estimate is shown to by very effective in reducing the
size of the search space when used in a
branch-and-bound scheduling algorithm. \par
Our methods can handle multicycle operations, pipelined
functional units, and chaining of operations. We also
present an extension to handle conditional branches. A
salient feature of the extended method is its
applicability to speculative execution as well as
C-select implementation of conditional branches.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Measurement;
Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "dynamic programming; high-level synthesis; lower-bound
estimated; scheduling",
subject = "Hardware --- Register-Transfer-Level Implementation
--- General (B.5.0); Hardware --- Performance and
Reliability --- Performance Analysis and Design Aids
(B.8.2); Theory of Computation --- Analysis of
Algorithms and Problem Complexity --- Nonnumerical
Algorithms and Problems (F.2.2): {\bf Sequencing and
scheduling}; Hardware --- Integrated Circuits ---
General (B.7.0)",
}
@Article{Vahid:1998:FPI,
author = "Frank Vahid and Thuy Dm Le and Yu-Chin Hsu",
title = "Functional partitioning improvements over structural
partitioning for packaging constraints and synthesis:
tool performance",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "181--208",
month = apr,
year = "1998",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/290833.290841",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p181-vahid/p181-vahid.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p181-vahid/",
abstract = "Incorporating functional partitioning into a synthesis
methodology leads to several important advantages. In
functional partitioning, we first partition a
functional specification into smaller subspecifications
and then synthesize structure for each, in contrast to
the current approach of first synthesizing structure
for the entire specification and then partitioning that
structure. One advantage is the improvement in I/O
performance and package count, when partitioning among
hardware blocks with size and I/O constraints, such as
FPGAs or blocks within an ASIC. A second advantage is
reduction in synthesis runtimes. We describe these
important advantages, concluding that further research
on functional partitioning can lead to improved results
from synthesis environments.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "behavioral synthesis; functional partitioning;
system-level design",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2): {\bf Automatic synthesis};
Hardware --- Register-Transfer-Level Implementation ---
Design Aids (B.5.2): {\bf Hardware description
languages}; Hardware --- Register-Transfer-Level
Implementation --- Design Aids (B.5.2): {\bf
Optimization}; Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)}",
}
@Article{Koch:1998:BBD,
author = "Gernot H. Koch and W. Rosenstiel and U. Kebschull",
title = "Breakpoints and breakpoint detection in source-level
emulation",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "209--230",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p209-koch/p209-koch.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p209-koch/",
abstract = "We present an approach for accelerating the validation
speed of behavioral system descriptions through
hardware emulation. The method allows source-level
debugging of running hardware specified in behavioral
VH DL in a way similar to source-leve debugging in
software programming languages. We discuss breakpoints
in source-level emulation and how the circuit generated
by high-level synthesis has to be modified to work with
breakpoints. Breakpoint encoding and detection are
shown in detail. Our approach allows breakpoint
detection by hardware with seriously slowing the
circuit or dramatically increasing its size.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "debugging; emulation; high-level synthesis",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Simulation}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf VHDL}",
}
@Article{Pomeranz:1998:FTG,
author = "Irith Pomeranz and Sudhakar M. Reddy",
title = "Functional test generation for delay faults in
combinational circuits",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "231--248",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p231-pomeranz/p231-pomeranz.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p231-pomeranz/",
abstract = "We propose a functional fault model for delay faults
in combinational circuits and describe a functional
test generation procedure based on this model. The
proposed method is most suitable when a gate-level
description of the circuit-under-test, necessary for
employing existing gate-level delay fault test
generators, is not available or does not accurately
describe the circuit. It is also suitable for
generating tests in early design stages of a circuit,
before a gate-level implementation is selected. In
addition, it can potentially be employed to supplement
conventional test generators for gate-level circuits to
reduce the cost of handling large numbers of paths. A
parameter called is used to control the number of
functional faults targeted and thus the number of tests
generated. If is unlimited, the functional test set
detects every robustly testable path delay fault in any
gate-level implementation of the given ciruit. An
appropriate subset of tests can be selected once the
implementation is known. The test sets generated for
various values of are fault simulated on gate-level
realizations to demonstrate their effectiveness. The
experiments indicate that functional test sets may be
able to identify functions whose realizations have low
path delay fault coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "delay faults; function-robust tests; functional delay
fault model; path delay faults; robust tests",
subject = "Hardware --- Performance and Reliability ---
Reliability, Testing, and Fault-Tolerance (B.8.1);
Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1)",
}
@Article{Chen:1998:SDI,
author = "X. T. Chen and F. J. Meyer and F. Lombardi",
title = "Structural diagnosis of interconnects by coloring",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "249--271",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p249-chen/p249-chen.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p249-chen/",
abstract = "This paper presents a new approach for diagnosing
shorts in interconnects in which the adjacencies
between nets are known. This structural approach
exploits different graph coloring techniques to
generate a test set with no aliasing and confounding,
i.e., full diagnosis (detection and location) is
accomplished. Initially, a simple coloring approach
based on a greedy condition of the adjacency graph is
proposed for fault detection. Then, the conditions for
aliasing and confounding are analyzed with respect to
the sizes of the possible shorts. These results are
used to generate new colors using a process called
color mixing. Color mixing guarantees that additional
tests, required in order to avoid aliasing/confounding,
will use appropriate codes. The characteristics of
unbalanced/balanced codes for encoding the colors in
the vector-generation process of interconnect diagnosis
are discussed and are proved to yield full diagnosis
using a novel method. An algorithm for full diagnosis
is then presented; this algorithm has an execution
complexity of $ O(\max (N^2, N \times D^3)) $ where $N$
is the number of nets and $D$ is the maximum degree of
the nodes in the adjacency graph. Simulation results
show that the proposed approach requires a smaller
number of test vectors than previous approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "balanced code; diagnosis; graph coloring;
interconnect; syndrome",
subject = "Mathematics of Computing --- Discrete Mathematics ---
Graph Theory (G.2.2); Computer Applications ---
Computer-Aided Engineering (J.6); Hardware ---
Performance and Reliability --- Reliability, Testing,
and Fault-Tolerance (B.8.1)",
}
@Article{Mehta:1998:ESR,
author = "Dinesh P. Mehta",
title = "Estimating the storage requirements of the rectangular
and {L-shaped} corner stitching data structures",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "272--284",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p272-mehta/p272-mehta.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p272-mehta/",
abstract = "This paper proposes a technique for estimating the
storage requirements of the Rectangular Corner
Stitching (RCS) data structure [Ousterhout 1984] and
the L-shaped Corner Stitching (LCS) data structure
[Mehta and Blust 1997] on a given circuit by studying
its (the circuit's) geometric properties. This provides
a method for estimating the storage requirements of a
circuit without having to implement the corner
stitching data structure, which is a tedious and
time-consuming task. This technique can also be used to
estimate the amount of space saved by employing the LCS
data structure over the RCS data structure on a given
circuit.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "corner stitching; data structures; L-shapes; memory
requirements analysis; rectangle; rectilinear
polygons",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Layout}; Data --- Data Storage
Representations (E.2): {\bf Linked representations};
Theory of Computation --- Analysis of Algorithms and
Problem Complexity --- Nonnumerical Algorithms and
Problems (F.2.2): {\bf Geometrical problems and
computations}",
}
@Article{Bhattacharya:1998:ERS,
author = "Subhrajit Bhattacharya and Sujit Dey and Franc
Breglez",
title = "Effects of resource sharing on circuit delay: an
assignment algorithm for clock period optimization",
journal = j-TODAES,
volume = "3",
number = "2",
pages = "285--307",
month = apr,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-2/p285-bhattacharya/p285-bhattacharya.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-2/p285-bhattacharya/",
abstract = "This paper analyzes the effect of resource sharing and
assignment on the clock period of the synthesized
circuit. The assignment phase assigns or binds
operations of the scheduled behavioral description to a
set of allocated resources. We focus on control-flow
intensive descriptions, characterized by the presence
of mutually exclusive paths due to the presence of
nested conditional branches and loops. \par
We show that clustering multiple operations in the same
state of the schedule, possibly leading to chaining of
functional units (FUs) in the RTL circuit, is an
effective way to minimize the total number of clock
cycles, and hence total execution time. We present an
assignment algorithm that is particularly effective for
such design styles by minimizing data chaining and
hence the clock period of the circuit, thereby leading
to further reduction in total execution time.
\par
Existing resource sharing and assignment approaches for
reducing the clock period of the resulting circuit
either increase the resource allocation or use faster
modules, both leading to leading to larger area
requirements. In this paper we show that even when the
type of available resource units and the number of
resource units of each type is fixed, different
assignments may lead to circuits with significant
differences in clock period. \par
We provide a comprehensive analysis of how resource
sharing and assignment introduces long paths in the
circuit. Based on the analysis, we develop an
assignment algorithm that uses a high-level delay
estimator to assign operations to a fixed set of
available resources so as to minimize the clock period
of the resultant circuit, with no or minimal effect on
the area of the circuit. Experimental results on
several conditional-intensive designs demonstrate the
effectiveness of the assignment algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "clock period; high-level synthesis; resource sharing",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2): {\bf Optimization}",
}
@Article{Cabodi:1998:AVB,
author = "Gianpiero Cabodi and Paolo Camurati and Stefano Quer",
title = "Auxiliary variables for {BDD-based} representation and
manipulation of {Boolean} functions",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "309--340",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p309-cabodi/p309-cabodi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p309-cabodi/",
abstract = "BDDs are the state-of-the-art technique for
representing and manipulating Boolean functions. Their
introduction caused a major leap forward in synthesis,
verification, and testing. However, they are often
unmanageable because of the large amount of nodes. To
attack this problem, we insert auxiliary variables that
decompose monolithic BDDs in smaller ones. This method
works very well for Boolean function representation. As
far as combinational circuits are concerned,
representing their functions is the main issue. Going
into the sequential domain, we focus on traversal
techniques. We show that, once we have Boolean
functions in decomposed form, symbolic manipulations
are viable and efficient. We investigate the relation
between auxiliary variables and static and dynamic
ordering strategies. Experimental evidence shows that
we achieve a certain degree of independence from
variable ordering. Thus, this approach can be an
alternative to dynamic re-ordering. Experimental
results on Boolean function representation, and exact
and approximate forward symbolic traversal of FSMs,
demonstrate the benefits both in terms of memory
requirements and of CPU time.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "binary decision diagrams; finite state machines;
functional decompositions; reachability analysis",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Verification}",
}
@Article{Cong:1998:BSC,
author = "Jason Cong and Andrew B. Kahng and Cheng-Kok Koh and
C.-W. Albert Tsao",
title = "Bounded-skew clock and {Steiner} routing",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "341--388",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p341-cong/p341-cong.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p341-cong/",
abstract = "We study the minimum-cost bounded-skew routing tree
problem under the pathlength (linear) and Elmore delay
models. This problem captures several engineering
tradeoffs in the design of routing topologies with
controlled skew. Our bounded-skew routing algorithm,
called the BST/DME algorithm, extends the DME algorithm
for exact zero-skew trees via the concept of {\em a
merging region}. For a {\em prescribed topology},
BST/DME constructs a bounded-skew tree (BST) in two
phases: (i) a bottom-up phase to construct a binary
tree of merging regions which represent the loci of
possible embedding points of the internal nodes, and
(ii) a top-down phase to determine the exact locations
of the internal nodes. We present two approaches to
construct the merging regions: (i) the {\em Boundary
Merging and Embedding\/} (BME) method which utilizes
merging points that are restricted to the {\em
boundaries\/} of merging regions, and (ii) the {\em
Interior Merging and Embedding\/} (IME) algorithm which
employs a sampling strategy and a dynamic
programming-based selection technique to consider
merging points that are {\em interior\/} to, as well as
on the boundary of, the merging regions. When the
topology is not prescribed, we propose a new {\em
Greedy\/}-BST/DME algorithm which combines the merging
region computation with topology generation. The
Greedy-BST/DME algorithm very closely matches the best
known heuristics for the zero-skew case and for the
unbounded-skew case (i.e., the Steiner minimal tree
problem). Experimental results show that our BST
algorithms can produce a set of routing solutions with
smooth skew and wire length tradeoffs.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "(inter)connection; boundary merging and embedding;
bounded-skew; clock tree; Elmore delay; interior
merging and embedding; low power; merging region;
merging segment; pathlength delay; Steiner tree;
synchronization; VLSI; zero-skew",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Placement and routing}; Computer
Applications --- Computer-Aided Engineering (J.6): {\bf
Computer-aided design (CAD)}",
}
@Article{Jone:1998:CAD,
author = "Wen-Ben Jone and K. S. Tsai",
title = "Confidence analysis for defect-level estimation of
{VLSI} random testing",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "389--407",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p389-jone/p389-jone.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p389-jone/",
abstract = "The defect level in circuit testing is the percentage
of circuits such as chips, that are defective and
shipped for use after testing. Our previously published
results showed that the defect level of circuit
fabrication and testing should be a probability
distribution, rather than a single value, and the
concept of confidence degree was proposed [Gondalia et
al. 1993; Jone et al. 1995]. In this work, defect level
is represented by a confidence interval which is more
conventional and easier to interpret. The point
estimate of defect level analysis and conditions to
avoid meaningless confidence intervals are also
investigated. Methods for adaptive random test length
determination driven by different confidence intervals
or interval length are proposed to meet both test
requirements and test costs tradeoff. Finally, a
complete test plan that can direct the test flow from
fabrication infancy to maturity is suggested.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Experimentation; Measurement; Performance;
Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "defect level analysis; random testing; test confidence
analysis; test quality; VLSI testing",
subject = "Hardware --- Performance and Reliability ---
Reliability, Testing, and Fault-Tolerance (B.8.1)",
}
@Article{Mathur:1998:RAE,
author = "Anmol Mathur and Ali Dasdan and Rajesh K. Gupta",
title = "Rate analysis for embedded systems",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "408--436",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p408-mathur/p408-mathur.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p408-mathur/",
abstract = "Embedded systems consist of interacting components
that are required to deliver a specific functionality
under constraints on execution rates and relative time
separation of the components. In this article, we model
an embedded system using concurrent processes
interacting through synchronization. We assume that
there are rate constraints on the execution rates of
processes imposed by the designer or the environment of
the system, where the execution rate of a process is
the number of its executions per unit time. We address
the problem of computing bounds on the execution rates
of processes constituting an embedded system, and
propose an interactive rate analysis framework. As part
of the rate analysis framework we present an efficient
algorithms for checking the consistency of the rate
constraints. Bounds on the execution rate of each
process are computed using an efficient algorithm based
on the relationship between the execution rate of a
process and the maximum mean delay cycles in the
process graph. Finally, if the computed rates violate
some of the rate constraints, some of the processes in
the system are redesigned using information from the
rate analysis step. This rate analysis framework is
implemented in a tool called RATAN. We illustrate by an
example how RATAN can be used in an embedded system
design.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "average execution rate; concurrent system modeling;
embedded systems; interactive rate violation debugging;
rate analysis; rate constraints",
subject = "Computer Systems Organization --- Performance of
Systems (C.4): {\bf Modeling techniques}; Computer
Systems Organization --- Performance of Systems (C.4):
{\bf Performance attributes}; Computer Systems
Organization --- Special-Purpose and Application-Based
Systems (C.3): {\bf Real-time and embedded systems};
Computer Systems Organization --- Performance of
Systems (C.4): {\bf Design studies}",
}
@Article{Pan:1998:OCP,
author = "Peichen Pan and C. L. Liu",
title = "Optimal clock period {FPGA} technology mapping for
sequential circuits",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "437--462",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p437-pan/p437-pan.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p437-pan/",
abstract = "We study the technology mapping problem for sequential
circuits for look-up table (LUT) based field
programmable gate arrays (FPGAs). Existing approaches
to the problem simply remove the flip-flops (FFs), then
map the remaining combinational logic, and finally put
the FFs back. These approaches ignore the sequential
nature of a circuit and assume the positions of the FFs
are fixed. However, FFs in a sequential circuit can be
repositioned by a functionality-preserving
transformation called retiming. As a result, existing
approaches can only consider a very small portion of
the available solution space. We propose in this paper
a novel approach to the technology mapping problem. In
our approach, retiming is integrated into the
technology mapping process so as to consider the full
solution space. We then present a polynomial technology
mapping algorithm that, for a given circuit, produces a
mapping solution with the minimum clock period among
all possible ways of retiming. The effectiveness of the
algorithm is also demonstrated experimentally.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "clock period; field-programmable gate arrays; FPGAs;
logic replication; look-up tables; retiming; sequential
synthesis; technology mapping",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Sequential circuits}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf Automatic synthesis};
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Optimization}",
}
@Article{Riepe:1998:EBD,
author = "Michael A. Riepe and Karem A. Sakallah",
title = "The edge-based design rule model revisited",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "463--486",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p463-riepe/p463-riepe.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p463-riepe/",
abstract = "A model for integrated circuit design rules based on
rectangle edge constraints has been proposed by
Jeppson, Christensson, and Hedenstierna. This model
appears to be the most rigorous proposed to date for
the description of such edge-based design rules.
However, in certain rare circumstances their model is
unable to express the correct design rule when the
constrained edges are not adjacent in the layout. We
introduce a new notation, called an edge path, which
allows us to extend their model to allow for
constraints between edges separated by an arbitrary
number of intervening edges. Using this notation we
enumerate all edge paths that are required to correctly
model the original design rule macros of the JCH model,
and prove that these macros are sufficient to model the
most common rules. We also show how this notation
allows us to directly specify many kinds of conditional
design rules that required ad hoc specification under
the JCH model.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Theory; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design rule checking; design rules; layout
verification",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6); Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Verification}",
}
@Article{Su:1998:EFL,
author = "Alan Su and Yu-Chin Hsu and Ta-Yung Liu and Mike
Tien-Chien Lee",
title = "Eliminating false loops caused by sharing in control
path",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "487--495",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p487-su/p487-su.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p487-su/",
abstract = "In high-level synthesis, resource sharing may result
in a circuit containing false loops that create great
difficulty in timing validation during the design
sign-off phase. It is hence desirable to avoid
generating any false loops in a synthesized circuit.
Previous work [Stok 1992; Huang et al. 1995] considered
mainly data path sharing for false loop elimination.
However, for a complete circuit with both data path and
control path, false loops can be created due to control
logic sharing. In this article, we present a novel
approach to detect and eliminate the false loops caused
by control logic sharing. An effective filter is
devised to reduce the computational complexity of false
loop detection, which is based on checking the level
numbers that are propagated from data path operators to
inputs and outputs of the control path. Only the
input/output pairs of the control path identified by
the filter are further investigated by traversing into
the data path for false loop detection. A removal
algorithm is then applied to eliminate the detected
false loops, followed by logic minimization to further
optimize the circuit. Experimental results show that
for the nine example circuits we tested, the final
designs after false loop removal and logic minimization
give only slightly larger area than the original ones
that contain false loops.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "control path; false loop",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2): {\bf Automatic synthesis};
Hardware --- Register-Transfer-Level Implementation ---
Design Aids (B.5.2): {\bf Hardware description
languages}; Hardware --- Register-Transfer-Level
Implementation --- Design Aids (B.5.2): {\bf
Optimization}; Hardware --- Register-Transfer-Level
Implementation --- Design Aids (B.5.2): {\bf
Verification}",
}
@Article{Zhou:1998:ORR,
author = "Hai Zhou and D. F. Wong",
title = "Optimal river routing with crosstalk constraints",
journal = j-TODAES,
volume = "3",
number = "3",
pages = "496--514",
month = jul,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-3/p496-zhou/p496-zhou.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-3/p496-zhou/",
abstract = "With the increasing density of VLSI circuits, the
interconnection wires are being packed even closer.
This has increased the effect of interaction among
these wires on circuit performance and hence, the
importance of controlling crosstalk. In this article,
we consider river routing with crosstalk constraints.
Given the positions of the pins in a single-layer
routing channel and the maximum tolerable crosstalk
between each pair of neighboring nets, we give a
polynomial time algorithm to decide whether there is a
feasible river routing solution and produce one with
minimum crosstalk when it is feasible.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "crosstalk; river routing",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Placement and routing}; Mathematics of
Computing --- Discrete Mathematics --- Graph Theory
(G.2.2): {\bf Network problems}; Computer Applications
--- Computer-Aided Engineering (J.6): {\bf
Computer-aided design (CAD)}",
}
@Article{Passerone:1998:MRS,
author = "C. Passerone and C. Sansoe and L. Lavagno and R.
McGeer and J. Martin and R. Passerone and A.
Sangiovanni-Vincentelli",
title = "Modeling reactive systems in {Java}",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "515--523",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p515-passerone/p515-passerone.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p515-passerone/",
abstract = "We present an application of the Java TM programming
language to specify and implement reactive real-time
systems. We have developed and tested a collection of
classes and methods to describe concurrent modules and
their asynchronous communication by means of signals.
The control structures are closely patterned after
those of the synchronous language {\em Esterel},
succinctly describing concurrency, sequencing and
preemption. We show the user-friendliness and
efficiency of the proposed technique by using an
example from the automotive domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Languages; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "high level design; Java; prototyping; simulation",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Hardware description languages}; Computing
Methodologies --- Simulation and Modeling --- Model
Validation and Analysis (I.6.4); Computer Applications
--- Physical Sciences and Engineering (J.2): {\bf
Electronics}; Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)}",
}
@Article{Wang:1998:MEV,
author = "Li-C. Wang and Magdy S. Abadir and Jing Zeng",
title = "On measuring the effectiveness of various design
validation approaches for {PowerPC} microprocessor
embedded arrays",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "524--532",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p524-wang/p524-wang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p524-wang/",
abstract = "Design validation for embedded arrays remains as a
challenging problem in today's microprocessor design
environment. At Somerset, validation of array designs
relies on both formal verification and vector
simulation. Although several methods for array design
validation have been proposed and had great success
[Ganguly et al. 1996; Pandey et al. 1996, 1997; Wang
and Abadir 1997], little evidence has been reported for
the effectiveness of these methods with respect to the
detection of design errors. In this paper, we measure
the effectiveness of different validation approaches
based on automatic design error injection and
simulation. The technique provides a systematic way to
evaluate various validation approaches at both logic
and transistor levels. Experimental results on recent
PowerPC microprocessor arrays will be discussed and
reported.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "assertion test generation; ATPG; design error model;
logic verification; symbolic trajectory evaluation;
validation",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Simulation}; Hardware --- Logic Design --- Design
Aids (B.6.3): {\bf Verification}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Simulation}; Hardware --- Integrated Circuits ---
Design Aids (B.7.2): {\bf Verification}",
}
@Article{Dasdan:1998:TDD,
author = "Ali Dasdan and Dinesh Ramanathan and Rajesh K. Gupta",
title = "A timing-driven design and validation methodology for
embedded real-time systems",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "533--553",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p533-dasdan/p533-dasdan.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p533-dasdan/",
abstract = "We address the problem of timing constraint derivation
and validation for reactive and real-time embedded
systems. We assume that such a system is structured
into its tasks, and the structure is modeled using a
task graph. Our solution uses the timing behavior
committed by the environment to the system first to
derive the timing constraints on the system's internal
behavior and then use them to derive and validate the
timing constraints on the system's external behavior.
Our solution consists of the following contributions: a
generalized task graph model, a comprehensive
classification of timing constraints, algorithms for
derivation and validation of timing constraints of the
system modeled in the generalized task graph model, a
codesign methodology that combines the model and the
algorithms, and the implementation of this methodology
in a tool called RADHA-RATAN. The main advantages of
our solution are that it simplifies the problem of
ensuring timing correctness of the system by reducing
the complexity of the problem from system level to task
level, and that it makes the codesign methodology
timing-driven in that our solution makes it possible to
maintain a handle on the system's timing correctness
from very early stages in the system's design flow.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "performance verification; period assignment; period
derivation; rate assignment; rate derivation;
requirements analysis; system-level design; timing
analysis; timing-driven codesign",
subject = "Computer Systems Organization --- General (C.0): {\bf
Systems specification methodology}; Computer Systems
Organization --- Special-Purpose and Application-Based
Systems (C.3): {\bf Real-time and embedded systems};
Computer Systems Organization --- Performance of
Systems (C.4): {\bf Modeling techniques}; Computer
Systems Organization --- Performance of Systems (C.4):
{\bf Performance attributes}; Software --- Operating
Systems --- Organization and Design (D.4.7): {\bf
Real-time systems and embedded systems}; Software ---
Operating Systems --- Performance (D.4.8): {\bf
Modeling and prediction}; Computer Applications ---
Computer-Aided Engineering (J.6): {\bf Computer-aided
design (CAD)}",
}
@Article{Rajan:1998:ASD,
author = "S. P. Rajan and M. Fujita and K. Yuan and M. T-C.
Lee",
title = "{ATM} switch design by high-level modeling, formal
verification and high-level synthesis",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "554--562",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p554-rajan/p554-rajan.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p554-rajan/",
abstract = "Asynchronous Transfer Mode (ATM) has emerged as a
backbone for high-speed broadband telecommunication
networks. In this paper, we present ATM switch design,
starting from a parametric high-level model and
debugging the model using a combination of formal
verification and simulation. The model has been used to
synthesize ATM switches according to customers'
choices, by choosing concrete values for each of the
generic parameters. We provide a pragmatic combination
of simulation, model checking, and theorem proving to
gain confidence in the ATM switch design correctness.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "ATM switch; high-level design; synthesis;
verification",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}",
}
@Article{Huggins:1998:SVP,
author = "James K. Huggins and David {Van Campenhout}",
title = "Specification and verification of pipelining in the
{ARM2} {RISC} microprocessor",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "563--580",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p563-huggins/p563-huggins.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p563-huggins/",
abstract = "Gurevich Abstract State Machines (ASMs) provide a
sound mathematical basis for the specification and
verification of systems. An application of the ASM
methodology to the verification of a pipelined
microprocessor (an ARM2 implementation) is described.
Both the sequential execution model and final pipelined
model are formalized using ASMs. A series of
intermediate models are introduced that gradually
expose the complications of pipelining. The first
intermediate model is proven equivalent to the
sequential model in the absence of structural, control,
and data hazards. In the following steps, these
simplifying assumptions are lifted one by one, and the
original proof is refined to establish the equivalence
of each intermediate model with the sequential model,
leading ultimately to a full proof of equivalence of
the sequential and pipelined models.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "abstract state machines; ARM processor; design
verification; formal verification; pipelined
processors; pipelining",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2); Computer Systems Organization
--- General (C.0): {\bf Systems specification
methodology}; Computer Systems Organization ---
Processor Architectures --- Single Data Stream
Architectures (C.1.1)",
}
@Article{VanCampenhout:1998:HLD,
author = "D. {Van Campenhout} and H. Al-Asaad and J. P. Hayes
and T. Mudge and R. B. Brown",
title = "High-level design verification of microprocessors via
error modeling",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "581--599",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p581-campenhout/p581-campenhout.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p581-campenhout/",
abstract = "A design verification methodology for microprocessor
hardware based on modeling design errors and generating
simulation vectors for the modeled errors via physical
fault testing techniques is presented. We have
systematically collected design error data from a
number of microprocessor design projects. The error
data is used to derive error models suitable for design
verification testing. A class of basic error models is
identified and shown to yield tests that provide good
coverage of common error types. To improve coverage for
more complex errors, a new class of conditional error
models is introduced. An experiment to evaluate the
effectiveness of our methodology is presented. Single
actual design errors are injected into a correct
design, and it is determined if the methodology will
generate a test that detects the actual errors. The
experiment has been conducted for two microprocessor
designs and the results indicate that very high
coverage of actual design errors can be obtained with
test sets that are complete for a small number of
synthetic error models.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design errors; design verification; error modeling",
subject = "Hardware --- General (B.0); Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2)",
}
@Article{Hasteer:1998:EEC,
author = "G. Hasteer and A. Mathur and P. Banerjee",
title = "Efficient equivalence checking of multi-phase designs
using phase abstraction and retiming",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "600--625",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p600-hasteer/p600-hasteer.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p600-hasteer/",
abstract = "Equivalence checking of finite state machines (FSMs)
traditionally assumes single phase machines where a
single clock (implicit or explicit) synchronizes the
state of the FSM. We extend the equivalence checking
paradigm to FSMs with multi-phase clocks. Such designs
are becoming increasingly popular in high performance
microprocessors since they result in lower
synchronization overhead. In addition, aggressive
pipelining and the use of ``sparse'' encodings results
in designs where the ratio of steady states to the
total state space is very low. In this paper, we show
that automatically transforming such designs to ones
that have more ``dense'' encodings can result in
significant benefits in using implicit BDD-based
techniques for their verification. We explore two such
techniques: {\em phase abstraction\/} and {\em
retiming\/} and demonstrate their utility in the
context of FSM equivalence checking. The main
contributions of our work are: \par
--We show that a multi-phase FSM can be transformed to
a functionally equivalent one phase FSM and this phase
abstraction leads to significant improvement in the
size of FSMs that can be checked for equivalence.
\par
--We show that min-latch retiming preserves equivalence
and can be performed efficiently in multi-phase
designs, even when latch borrowing and discarding is
allowed at the primary inputs and outputs. \par
--We demonstrate the utility of our approach on several
controller FSMs from the industry.",
acknowledgement = ack-nhfb,
annote = "Article title page incorrectly has Bannerjee instead
of Banerjee.",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance; Theory;
Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "binary decision; diagram; encoding density;
multi-phase FSM; product machine; sequential hardware
equivalence; steady states",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3);
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Verification}; Computer Applications --- Computer-Aided
Engineering (J.6): {\bf Computer-aided design (CAD)}",
}
@Article{Benso:1998:ELC,
author = "A. Benso and P. Prinetto and M. Rebaudengo and M.
Sonza Reorda",
title = "{EXFI}: a low-cost fault injection system for embedded
microprocessor-based boards",
journal = j-TODAES,
volume = "3",
number = "4",
pages = "626--634",
month = oct,
year = "1998",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1998-3-4/p626-benso/p626-benso.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1998-3-4/p626-benso/",
abstract = "Evaluating the faulty behavior of low-cost embedded
microprocessor-based boards is an increasingly
important issue, due to their adoption in many safety
critical systems. The architecture of a complete Fault
Injection environment is proposed, integrating a module
for generating a collapsed list of faults, and another
for performing their injection and gathering the
results. To address this issue, the paper describes a
software-implemented Fault Injection approach based on
the Trace Exception Mode available in most
microprocessors. The authors describe EXFI, a
prototypical system implementing the approach, and
provide data about some sample benchmark applications.
The main advantages of EXFI are the low cost, the good
portability, and the high efficiency",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Measurement",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "fault coverage; fault injection; microprocessor
systems; software-implemented fault injection; trace
exception mode",
subject = "Hardware --- Performance and Reliability ---
Reliability, Testing, and Fault-Tolerance (B.8.1);
Hardware --- Performance and Reliability ---
Performance Analysis and Design Aids (B.8.2)",
}
@Article{Gasteier:1999:BBC,
author = "Michael Gasteier and Manfred Glesner",
title = "Bus-based communication synthesis on system level",
journal = j-TODAES,
volume = "4",
number = "1",
pages = "1--11",
month = jan,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p1-gasteier/p1-gasteier.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p1-gasteier/",
abstract = "In this article, we present an approach to automatic
generation of communication topologies for statically
scheduled systems of subsystems. Given a specification
containing a set of processes that communicate via
abstract send and receive functions, we show how a
cost-efficient communication topology consisting of one
or more buses without arbitration scheme can be set up
for such applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bus generation; bus without arbitration; communication
synthesis; statically scheduled systems; transfer
scheduling",
subject = "Hardware --- Input/Output and Data Communications ---
Interconnections (Subsystems) (B.4.3)",
}
@Article{Liao:1999:TCB,
author = "Stan Liao and Srinivas Devadas and Kurt Keutzer",
title = "A text-compression-based method for code size
minimization in embedded systems",
journal = j-TODAES,
volume = "4",
number = "1",
pages = "12--38",
month = jan,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p12-liao/p12-liao.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p12-liao/",
abstract = "We address the problem of code-size minimization in
VLSI systems with embedded DSP processors. Reducing
code size reduces the production cost of embedded
systems \par
we use data-compression methods to develop code-size
minimization strategies. In our framework, the
compressed program consists of a skeleton and a
dictionary. We show that the dictionary can be computed
by solving a set-covering problem derived from the
original program. To execute the compressed code, we
describe two methods that have different performance
characteristics and different degrees of freedom in
compressing the code. We also address performance
considerations, and show that they can be incorporated
easily into the set-covering formulation, and present
experimental results obtained with Texas Instruments'
optimizing TMS3220C25 compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Experimentation; Measurement;
Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code size optimization; compression",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Compilers}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Optimization};
Data --- Coding and Information Theory (E.4): {\bf Data
compaction and compression}",
}
@Article{Song:1999:CDP,
author = "Xiaoyu Song and Yuke Wang",
title = "On the crossing distribution problem",
journal = j-TODAES,
volume = "4",
number = "1",
pages = "39--51",
month = jan,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p39-song/p39-song.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p39-song/",
abstract = "VLSI layout design is typically decomposed into four
steps: {\em placement, global routing, routing region
definition, and detailed routing}. The crossing
distribution problem occurs prior to detailed routing
[Groenveld 1989; Mared-Sadowska and Sarrafzadeh 1995;
Wang and Shung 1992]. A {\em crossing\/} is defined as
the intersection of two nets. The problem of net
crossing distribution is important in layout design,
such as design of dense chips, multichip modules (MCM),
critical net routing, and analog circuits [Groenveld
1989; Sarrafzadeh 1995; Wang and Shung 1992]. It is
observed that nets crossing each other are more
difficult to route than those that do not cross. The
layout of crossing nets has to be realized in more than
two layers and requires a larger number of {\em vias}.
In this paper we study the crossing distribution
problem of two-terminal nets between two regions. We
present an optimal $ O(n^2) $ time algorithm for
two-sided nets, where n is the number of nets. Our
results are superior to previous ones [Markek-Sadowska
and Sarrafzadeh 1995; Wang and Shung 1992]. We give an
optimal $ O(n^2) $ time algorithm for the crossing
distribution problem with one-sided nets. We solve
optimally the complete version of the crossing
distribution problem for two-terminal nets in two
regions that has not been studied before.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "crossings; VLSI layout",
subject = "Hardware --- Integrated Circuits (B.7); Hardware ---
Integrated Circuits --- Design Aids (B.7.2); Theory of
Computation --- Analysis of Algorithms and Problem
Complexity (F.2); Theory of Computation --- Analysis of
Algorithms and Problem Complexity --- Nonnumerical
Algorithms and Problems (F.2.2): {\bf Sequencing and
scheduling}",
}
@Article{Tseng:1999:TLL,
author = "Jyh-Mou Tseng and Jing-Yang Jou",
title = "Two-level logic minimization for low power",
journal = j-TODAES,
volume = "4",
number = "1",
pages = "52--69",
month = jan,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p52-tseng/p52-tseng.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p52-tseng/",
abstract = "In this paper we present a complete Boolean method for
reducing the power consumption in two-level
combinational circuits. The two-level logic optimizer
performs the logic minimization for low power targeting
static PLA, general logic gates, and dynamic PLA
implementations. We modify the Espresso algorithm by
adding our heuristics, which bias logic minimization
toward lowering power dissipation. In our heuristics,
signal probabilities and transition densities are two
important parameters. The experimental results are
promising.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "logic synthesis; low power design; programmable logic
array; two-level logic minimization",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Combinational logic}; Hardware --- Logic Design
--- Design Styles (B.6.1): {\bf Logic arrays}; Hardware
--- Logic Design --- Design Aids (B.6.3): {\bf
Automatic synthesis}; Hardware --- Integrated Circuits
--- Types and Design Styles (B.7.1): {\bf VLSI (very
large scale integration)}",
}
@Article{Vahid:1999:PCT,
author = "Frank Vahid",
title = "Procedure cloning: a transformation for improved
system-level functional partitioning",
journal = j-TODAES,
volume = "4",
number = "1",
pages = "70--96",
month = jan,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p70-vahid/p70-vahid.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p70-vahid/",
abstract = "Functional partitioning assigns the functions of a
system's program-like specification among system
components, such as standard-software and
custom-hardware processors. We introduce a new
transformation, called procedure cloning, that
significantly improves functional partitioning results.
The transformation creates a clone of a procedure for
sole use by a particular procedure caller, so the clone
can be assigned to the caller's processor, which in
turn improves performance through reduced
communication. Heuristics are used to prevent the
exponential size increase that could occur if cloning
were done indiscriminately. We introduce a variety of
cloning heuristics, highlight experiments demonstrating
the improvements obtained using cloning, and compare
the various cloning heuristics.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "behavioral synthesis; embedded systems; functional
partitioning; hardware/software codesign; replication;
system-level design; system-on-a-chip;
transformations",
subject = "Hardware --- General (B.0); Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Automatic synthesis}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Hardware description languages}; Hardware
--- Register-Transfer-Level Implementation --- Design
Aids (B.5.2): {\bf Optimization}; Computer Applications
--- Computer-Aided Engineering (J.6): {\bf
Computer-aided design (CAD)}",
}
@Article{Wang:1999:PRP,
author = "Qi Wang and Sarma B. K. Vrudhula and Gary Yeap and
Shantanu Ganguly",
title = "Power reduction and power-delay trade-offs using logic
transformations",
journal = j-TODAES,
volume = "4",
number = "1",
pages = "97--121",
month = jan,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-1/p97-wang/p97-wang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-1/p97-wang/",
abstract = "We present an efficient technique to reduce the
switching activity in a technology-mapped CMOS
combinational circuit based on local logic
transformations. The transformations consist of adding
redundant connections or gates so as to reduce
switching activity. We describe simple and efficient
procedures, based on logic implication, for identifying
the sources and targets of the redundant connections.
Additionally, we give procedures that permit the
designer to trade-off power and delay after the
transformations. Results of experiments on both the
MCNC benchmark circuits and the circuits of a PowerPC
microprocessor chip are given. The results indicate
that significant power reduction of a CMOS
combinational circuit can be achieved with very low
area overhead, delay penalty, and computational cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "CMOS logic; logic optimization; logic synthesis; low
power; power estimation",
subject = "Hardware --- Integrated Circuits --- General (B.7.0);
Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Combinational logic}",
}
@Article{Kern:1999:FVH,
author = "Christoph Kern and Mark R. Greenstreet",
title = "Formal verification in hardware design: a survey",
journal = j-TODAES,
volume = "4",
number = "2",
pages = "123--193",
month = apr,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p123-kern/p123-kern.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p123-kern/",
abstract = "In recent years, formal methods have emerged as an
alternative approach to ensuring the quality and
correctness of hardware designs, overcoming some of the
limitations of traditional validation techniques such
as simulation and testing. \par
There are two main aspects to the application of formal
methods in a design process: the formal framework used
to specify desired properties of a design and the
verification techniques and tools used to reason about
the relationship between a specification and a
corresponding implementation. We survey a variety of
frameworks and techniques proposed in the literature
and applied to actual designs. The specification
frameworks we describe include temporal logics,
predicate logic, abstraction and refinement, as well as
containment between $ \omega $-regular languages. The
verification techniques presented include model
checking, automata-theoretic techniques, automated
theorem proving, and approaches that integrate the
above methods. \par
In order to provide insight into the scope and
limitations of currently available techniques, we
present a selection of case studies where formal
methods were applied to industrial-scale designs, such
as microprocessors, floating-point hardware, protocols,
memory subsystems, and communications hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "case studies; formal methods; formal verification;
hardware verification; language containment; model
checking; survey; theorem proving",
subject = "General Literature --- Introductory and Survey (A.1);
Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Verification}",
}
@Article{Lee:1999:BBI,
author = "Kuen-Jong Lee and Jing-Jou Tang and Tsung-Chu Huang",
title = "{BIFEST}: a built-in intermediate fault effect sensing
and test generation system for {CMOS} bridging faults",
journal = j-TODAES,
volume = "4",
number = "2",
pages = "194--218",
month = apr,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p194-lee/p194-lee.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p194-lee/",
abstract = "This paper presents BIFEST, an ATPG system that
employs the built-in intermediate voltage test
technique in an efficient ATPG process to deal with
CMOS bridging faults. Fast and accurate calculations of
the intermediate bridging voltages and the variant
threshold tolerance margins on a resistive bridging
fault model are presented. A PODEM-like, PPSFP-based
ATPG process is developed to generate test patterns for
faults that are conventionally logic-testable. The
remaining faults are then dealt with by special
circuits, called built-in intermediate voltage sensors
(BIVSs). By this methodology, almost the same fault
coverage as that employing {\em I DDQ\/} testing can be
achieved with only logic monitoring required.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Integrated Circuits --- General (B.7.0)",
}
@Article{Thornton:1999:BSC,
author = "M. A. Thornton and V. S. S. Nair",
title = "Behavioral synthesis of combinational logic using
spectral-based heuristics",
journal = j-TODAES,
volume = "4",
number = "2",
pages = "219--230",
month = apr,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-2/p219-thornton/p219-thornton.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-2/p219-thornton/",
abstract = "A prototype system developed to convert a behavioral
representation of a Boolean function in OBDD form into
an initial structural representation is described and
experimental results are given. The system produces a
multilevel circuit using heuristic rules based on
properties of a subset of spectral coefficients. Since
the behavioral description is in OBDD form, efficient
methods are used to quickly compute the small subset of
spectral coefficients needed for the application of the
heuristics. The heuristics guide subsequent
decompositions of the OBDD, resulting in an iterative
construction of the structural form. At each stage of
the translation, the form of the decomposition is
chosen in order to achieve optimization goals.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "automatic synthesis; decision diagram; decision
diagrams; design aids; logic design; spectral methods",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3)",
}
@Article{Cheng:1999:CGN,
author = "Wei-Kai Cheng and Youn-Long Lin",
title = "Code generation of nested loops for {DSP} processors
with heterogeneous registers and structural
pipelining",
journal = j-TODAES,
volume = "4",
number = "3",
pages = "231--256",
month = jul,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p231-cheng/p231-cheng.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p231-cheng/",
abstract = "We propose a microcode-optimizing method targeting a
programmable DSP processor. Efficient generation of
microcodes is essential to better utilize the
computation power of a DSP processor. Since most
state-of-the-art DSP processors feature some sort of
irregular architectures and most DSP applications have
nested loop constructs, their code generation is a
nontrivial task. In this paper, we consider two
features frequently found in contemporary DSP
processors -- structural pipelining and heterogeneous
registers. We propose a code generator that performs
instruction scheduling and register allocation
simultaneously. The proposed approach has been
implemented and evaluated using a set of benchmark core
algorithms. Simulation of the generated codes targeted
towards the TI TMS320C40 DSP processor shows that our
system is indeed more effective compared with a
commercial optimizing DSP compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code generation; DSP",
subject = "Computer Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf Real-time and
embedded systems}",
}
@Article{Li:1999:PEE,
author = "Yau-Tsun Steven Li and Sharad Malik and Andrew Wolfe",
title = "Performance estimation of embedded software with
instruction cache modeling",
journal = j-TODAES,
volume = "4",
number = "3",
pages = "257--279",
month = jul,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p257-li/p257-li.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p257-li/",
abstract = "Embedded systems generally interact in some way with
the outside world. This may involve measuring sensors
and controlling actuators, communicating with other
systems, or interacting with users. These functions
impose real-time constraints on system design.
Verification of these specifications requires computing
an upper bound on the worst-case execution time (WCET)
of a hardware/software system. Furthermore, it is
critical to derive a tight upper bound on WCET in order
to make efficient use of system resources. \par
The problem of bounding WCET is particularly difficult
on modern processors. These processors use cache-based
memory systems that vary memory access time based on
the dynamic memory access pattern of the program. This
must be accurately modeled in order to tightly bound
WCET. Several analysis methods have been proposed to
bound WCET on processors with instruction caches.
Existing approaches either search all possible program
paths, an intractable problem, or they use highly
pessimistic assumptions to limit the search space. In
this paper we present a more effective method for
modeling instruction cache activity and computing a
tight bound on WCET. The method uses an integer linear
programming formulation and does not require explicit
enumeration of program paths. The method is implemented
in the program {\tt cinderella} and we present some
experimental results of this implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Computer Systems Organization --- Performance of
Systems (C.4): {\bf Modeling techniques}; Computer
Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf Real-time and
embedded systems}",
}
@Article{Shi:1999:SSL,
author = "C.-J. Richard Shi and Michael W. Tian",
title = "Simulation and sensitivity of linear analog circuits
under parameter variations by {Robust} interval
analysis",
journal = j-TODAES,
volume = "4",
number = "3",
pages = "280--312",
month = jul,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p280-shi/p280-shi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p280-shi/",
abstract = "An interval-mathematic approach is presented for
frequency-domain simulation and sensitivity analysis of
linear analog circuits under parameter variations. With
uncertain parameters represented as intervals, bounding
frequency-domain responses is formulated as the problem
of solving systems of linear interval equations. The
formulation is based on a variant of modified nodal
analysis, and is particularly amenable to interval
analysis. Some characterization of the solution sets of
systems of linear interval equations are derived. With
these characterizations, an elegant and efficient
algorithm is proposed to solve systems of linear
interval equations. While the widely used Monte Carlo
approach requires many circuit simulations to achieve
even moderate accuracy, the computational cost of the
proposed approach is about twice that of one circuit
simulation. The computed response bounds contain
provably, or are usually very close to, the actual
response bounds. Further, sensitivity under parameter
variations can be computed from the response bounds at
minor computational cost. The algorithms are
implemented in SPICE3F5, using sparse-matrix techniques
and tested on several practical analog circuits.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "interval mathematics; process variations; sensitivity;
uncertainty; worst-case analysis",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Simulation}; Hardware --- Integrated
Circuits --- Design Aids (B.7.2): {\bf Verification};
Mathematics of Computing --- Numerical Analysis ---
Numerical Linear Algebra (G.1.3): {\bf Linear systems
(direct and iterative methods)}; Mathematics of
Computing --- Numerical Analysis --- Numerical Linear
Algebra (G.1.3): {\bf Sparse, structured, and very
large systems (direct and iterative methods)}; Computer
Applications --- Computer-Aided Engineering (J.6): {\bf
Computer-aided manufacturing (CAM)}",
}
@Article{Wurth:1999:FMO,
author = "Bernd Wurth and Ulf Schlichtmann and Klaus Eckl and
Kurt J. Antreich",
title = "Functional multiple-output decomposition with
application to technology mapping for lookup
table-based {FPGAs}",
journal = j-TODAES,
volume = "4",
number = "3",
pages = "313--350",
month = jul,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-3/p313-wurth/p313-wurth.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-3/p313-wurth/",
abstract = "Functional decomposition is an important technique for
technology mapping to look up table-based FPGA
architectures. We present the theory of and a novel
approach to functional disjoint decomposition of
multiple-output functions, in which common subfunctions
are extracted during technology mapping. \par
While a Boolean function usually has a very large
number of subfunctions, we show that not all of them
are useful for multiple-output decomposition. We use a
partition of the set of bound set vertices as the basis
to compute {\em preferable\/} decomposition functions,
which are sufficient for an optimal multiple-output
decomposition. \par
We propose several new algorithms that deal with
central issues of functional multiple-output
decomposition. First, an efficient algorithm to solve
the variable partitioning problem is described. Second,
we show how to implicitly compute all preferable
functions of a single-output function and how to
identify all common preferable functions of a
multiple-output function. Due to implicit computation
in the crucial steps, the algorithm is very efficient.
Experimental results show significant reductions in
area.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance;
Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "assignable functions; Boolean functions;
computer-aided design of VLSI; decomposition; FPGA
technology; implicit BDD-based methods; mapping
synthesis; multiple-output decomposition; preferable
functions; subfunction sharing gain; subfunction
sharing potential; TOS; variable partitioning for
decomposition",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6); Hardware --- Integrated Circuits --- Types and
Design Styles (B.7.1): {\bf Gate arrays}",
}
@Article{Benini:1999:SSC,
author = "L. Benini and G. {De Micheli} and E. Macii and M.
Poncino and R. Scarsi",
title = "Symbolic synthesis of clock-gating logic for power
optimization of synchronous controllers",
journal = j-TODAES,
volume = "4",
number = "4",
pages = "351--375",
month = oct,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p351-benini/p351-benini.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p351-benini/",
abstract = "Recent results have shown that dynamic power
management is effective in reducing the total power
consumption of sequential circuits. In this paper, we
propose a bottom-up approach for the automatic
extraction and synthesis of dynamic power management
circuitry starting from structural logic-level
specifications. Our techniques leverage the compact
BDD-based representation of Boolean and pseudo-Boolean
functions to detect idle conditions where the clock can
be stopped without compromising functional correctness.
Moreover, symbolic techniques allow accurate
probabilistic computations; in particular, they enable
the use of non-equiprobable primary input
distributions, a key step in the construction of models
that match the behavior of real hardware devices with a
high degree of fidelity. The results are encouraging,
since power savings of up to 34\% have been obtained on
standard benchmark circuits.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Sequential circuits}; Hardware --- Logic Design
--- Design Aids (B.6.3): {\bf Automatic synthesis};
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Optimization}",
}
@Article{Choi:1999:FDA,
author = "Kyumyung Choi and Steven P. Levitan",
title = "A flexible datapath allocation method for
architectural synthesis",
journal = j-TODAES,
volume = "4",
number = "4",
pages = "376--404",
month = oct,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p376-choi/p376-choi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p376-choi/",
abstract = "We present a robust datapath allocation method that is
flexible enough to handle constraints imposed by a
variety of target architectures. Key features of this
method are its ability to handle accurate modeling of
datapath units and the simultaneous optimization of
direct objective functions. The proposed method
consists of a new binding model construction scheme and
an optimization technique based on simulated annealing.
To illustrate the flexibility of this method, two
datapath allocation procedures have been developed for
two problem environments: (1) a procedure that
incorporates interconnection area and delay estimates,
where floor-planning is tightly integrated into
datapath allocation; and (2) a procedure that handles
registers, register files, and multiport memories for
data storage, as well as random and linear topologies
for interconnection architectures. Results from these
two applications show our method produces competitive
designs for benchmark circuits, as well as being
flexible enough to be used for a variety of different
domains.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "allocation and binding; high-level synthesis",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2): {\bf Automatic synthesis};
Hardware --- Register-Transfer-Level Implementation ---
Design Aids (B.5.2): {\bf Optimization}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Placement and routing}; Mathematics of Computing ---
Numerical Analysis --- Optimization (G.1.6); Computer
Applications --- Computer-Aided Engineering (J.6): {\bf
Computer-aided design (CAD)}",
}
@Article{Hong:1999:POU,
author = "Inki Hong and Miodrag Potkonjak and Ramesh Karri",
title = "Power optimization using divide-and-conquer techniques
for minimization of the number of operations",
journal = j-TODAES,
volume = "4",
number = "4",
pages = "405--429",
month = oct,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p405-hong/p405-hong.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p405-hong/",
abstract = "We introduce an approach for power optimization using
a set of compilation and architectural techniques. The
key technical innovation is a novel divide-and-conquer
compilation technique to minimize the number of
operations for general computations. Our technique
optimizes not only a significantly wider set of
computations than the previously published techniques,
but also outperforms (or performs at least as well as
other techniques) on all examples. Along the
architectural dimension, we investigate coordinated
impact of compilation techniques on the number of
processors which provide optimal trade-off between cost
and power. We demonstrate that proper compilation
techniques can significantly reduce power with bounded
hardware cost. The effectiveness of all techniques and
algorithms is documented on numerous real-life
designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code generation; transformations",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Compilers}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Optimization}",
}
@Article{Potkonjak:1999:MAD,
author = "Miodrag Potkonjak and Wayne Wolf",
title = "A methodology and algorithms for the design of hard
real-time multitasking {ASICs}",
journal = j-TODAES,
volume = "4",
number = "4",
pages = "430--459",
month = oct,
year = "1999",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1999-4-4/p430-potkonjak/p430-potkonjak.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1999-4-4/p430-potkonjak/",
abstract = "Traditional high-level synthesis concentrates on the
implementation of a single task (e.g. filter, linear
controller, A/D converter). However, many
applications--multifunctional embedded controllers
intelligent wireless end-points, and DSP and multimedia
servers--are defined as sets of several computational
tasks. This paper describes new techniques for the
synthesis of ASIC implementations that realize multiple
computational processes under hard real-time
constraints. Our synthesis methodology establishes
connections between two important computer engineering
domains: operating systems and behavioral synthesis.
Our hierarchical approach starts from an
incompletely-specified preliminary solution and uses,
interchangeably, operating system and behavioral
synthesis techniques to derive increasingly more
detailed and accurate design solutions. We have
experimented with both optimal and heuristic algorithms
to implement this methodology. The optimal algorithm
uses several heuristics to speed up the average run
time of an exhaustive branch-and-bound search.
Force-directed optimization is the core of the
heuristic synthesis method. Analysis of the proposed
algorithms and the experiments shows that matching the
number of bits and type of operational in tasks
assigned to the same application-specific processor was
the most important factor in obtaining area-efficient
designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Algorithms implemented in
hardware}",
}
@Article{DosSantos:2000:CMP,
author = "Luiz C. V. {Dos Santos} and M. J. M. Heijligers and C.
A. J. {Van Eijk} and J. {Van Eijndhoven} and J. A. G.
Jess",
title = "A code-motion pruning technique for global
scheduling",
journal = j-TODAES,
volume = "5",
number = "1",
pages = "1--33",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 09:50:12 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p1-dos_santos/p1-dos_santos.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p1-dos_santos/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Fang:2000:MFP,
author = "Wen-Jong Fang and Allen C.-H. Wu",
title = "Multiway {FPGA} partitioning by fully exploiting
design hierarchy",
journal = j-TODAES,
volume = "5",
number = "1",
pages = "34--50",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p34-fang/p34-fang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p34-fang/",
abstract = "In this paper, we present a new integrated synthesis
and partitioning method for multiple-FPGA applications.
Our approach bridges the gap between HDL synthesis and
physical partitioning by fully exploiting the design
hierarchy. We propose a novel multiple-FPGA synthesis
and partitioning method which is performed in three
phases: (1) fine-grained synthesis, (2)
functional-based clustering, and (3) hierarchical
set-covering partitioning. This method first
synthesizes a design specification in a fine-grained
way so that functional clusters can be preserved based
on the structural nature of the design specification.
Then, it applies a hierarchical set-covering
partitioning method to form the final FPGA partitions.
Experimental results on a number of benchmarks and
industrial designs demonstrate that IO limits are the
bottleneck for CLB utilization when applying a
traditional multiple-FPGA synthesis method on flattened
netlists. In contrast, by fully exploiting the design
structural hierarchy during the multiple-FPGA
partitioning, our proposed method produces fewer FPGA
partitions with higher CLB and lower IO-pin
utilizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "fine-grained synthesis; functional clustering;
multi-way partitioning; multiple-FPGA synthesis",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Gate arrays}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2)",
}
@Article{Hsiung:2000:CCM,
author = "Pao-Ann Hsiung",
title = "{CMAPS}: a cosynthesis methodology for
application-oriented parallel systems",
journal = j-TODAES,
volume = "5",
number = "1",
pages = "51--81",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p51-hsiung/p51-hsiung.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p51-hsiung/",
abstract = "Currently, a lot of research is devoted to {\em system
design}, and little work is done on {\em requirements
analysis}. Besides going from specification to design,
one of our main objectives is to show how an
application problem can be transformed into
specifications. Working from the hardware-software
codesign perspective, a system is designed starting
from an application problem itself, rather than the
detailed behavioral specifications. Given an
application problem specified as a directed acyclic
graph of elementary problems, a hardware-software
solution is derived such that the synthesized software,
a parallel pseudoprogram, can be scheduled and executed
on the synthesized software, a parallel pseudoprogram,
can be scheduled and executed on the synthesized
hardware, a set of system-level parallel computer
specifications, with heuristically optimal performance.
This is known as system-level cosynthesis of
application-oriented general-purpose parallel systems
for which a novel methodology called {\em Cosynthesis
Methodology for Application-Oriented Parallel
Systems\/} (CMAPS), is presented. Since parallel
programs and multiprocessor architectures are largely
interdependent, CMAPS explores the relationship between
hardware designs and software algorithms by
interleaving the modeling phases and the synthesis
phases of both hardware and software. High scalability
in terms of problem complexity and easy upgradability
to new technologies are achieved through modularization
of the input problem specification, of the software
algorithms, and of the hardware subsystem models. The
work presented in this paper will be beneficial to
designers of general-purpose parallel computer systems
which must be oriented toward solving some
user-specified problem such as the global controller of
an industry automation process or a multiprocessor
video server. Some application examples are given to
illustrate various codesign phases of CMAPS and its
feasibility.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "application-oriented general-purpose multiprocessors;
hardware-software modeling and cosynthesis;
requirements analysis",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}; Computer
Systems Organization --- General (C.0): {\bf System
architectures}; Computer Systems Organization ---
General (C.0): {\bf Systems specification methodology};
Computer Systems Organization --- Processor
Architectures --- Multiple Data Stream Architectures
(Multiprocessors) (C.1.2); Computer Systems
Organization --- Computer System Implementation ---
General (C.5.0); Computer Systems Organization ---
Processor Architectures --- Parallel Architectures
(C.1.4)",
}
@Article{Mehta:2000:UFR,
author = "Dinesh P. Mehta and Naveed Sherwani",
title = "On the use of flexible, rectilinear blocks to obtain
minimum-area floorplans in mixed block and cell
designs",
journal = j-TODAES,
volume = "5",
number = "1",
pages = "82--97",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p82-mehta/p82-mehta.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p82-mehta/",
abstract = "This paper presents three minimum-area floorplanning
algorithms that use flexible arbitrary rectilinear
shapes for the standard cell regions in MBC design. The
first algorithm (pure HCST) introduces a grid traversal
technique which guarantees a minimum-area floorplan.
The second algorithm (Hybrid-BF) uses a combination of
HCST and Breadth First (BF) traversals to give a
practical solution that approximately places flexible
blocks at specified locations called {\em seeds}. The
third algorithm (Hybrid-MBF) improves on the shapes of
the flexible blocks generated by Hybrid-BF by using a
combination of HCST and a Modified Breadth First (MBF)
traversal. All three algorithms are polynomial in the
number of grid squares. Optimizated implementations of
Hybrid-BF and Hybrid-MBF required less than two seconds
on a SUN SPARCstation 10.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "floorplanning; mixed block and cell designs;
rectilinear polygons",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Layout}; Theory of Computation ---
Analysis of Algorithms and Problem Complexity ---
Nonnumerical Algorithms and Problems (F.2.2): {\bf
Routing and layout}; Mathematics of Computing ---
Discrete Mathematics --- Graph Theory (G.2.2): {\bf
Graph algorithms}",
}
@Article{Sapatnekar:2000:PDO,
author = "Sachin S. Sapatnekar and Weitong Chuang",
title = "Power-delay optimizations in gate sizing",
journal = j-TODAES,
volume = "5",
number = "1",
pages = "98--114",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-1/p98-sapatnekar/p98-sapatnekar.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-1/p98-sapatnekar/",
abstract = "The problem of power-delay tradeoffs in transistor
sizing is examined using a nonlinear optimization
formulation. Both the dynamic and the short-circuit
power are considered, and a new modeling technique is
used to calculate the short-circuit power. The notion
of transition density is used, with an enhancement that
considers the effect of gate delays on the transition
density. When the short-circuit power is neglected, the
minimum power circuit is identical to the minimum area
circuit. However, under our more realistic models, our
experimental results on several circuits show that the
minimum power circuit is not necessarily the same as
the minimum area circuit.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "optimization; power estimation; transistor sizing;
VLSI layout",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Layout}",
}
@Article{Benini:2000:SLPa,
author = "Luca Benini and Giovanni de Micheli",
title = "System-level power optimization: techniques and
tools",
journal = j-TODAES,
volume = "5",
number = "2",
pages = "115--192",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p115-benini/p115-benini.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p115-benini/",
abstract = "This tutorial surveys design methods for
energy-efficient system-level design. We consider
electronic systems consisting of a hardware platform
and software layers. We consider the three major
constituents of hardware that consume energy, namely
computation, communication, and storage units, and we
review methods of reducing their energy consumption. We
also study models for analyzing the energy cost of
software, and methods for energy-efficient software
design and compilation. This survey is organized around
three main phases of a system design: conceptualization
and modeling design and implementation, and runtime
management. For each phase, we review recent techniques
for energy-efficient design of both hardware and
software.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Integrated Circuits --- Design Aids
(B.7.2); Hardware --- Performance and Reliability ---
Performance Analysis and Design Aids (B.8.2); Computer
Systems Organization --- Processor Architectures ---
General (C.1.0); Software --- Software Engineering ---
Design Tools and Techniques (D.2.2)",
}
@Article{Cong:2000:SGD,
author = "Jason Cong and Yean-Yow Hwang",
title = "Structural gate decomposition for depth-optimal
technology mapping in {LUT-based} {FPGA} designs",
journal = j-TODAES,
volume = "5",
number = "2",
pages = "193--225",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p193-cong/p193-cong.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p193-cong/",
abstract = "In this paper we study structural gate decomposition
in general, simple gate networks for depth-optimal
technology mapping using $K$-input Lookup-Tables
($K$-LUTs). We show that (1) structural gate
decomposition in any $K$-bounded network results in an
optimal mapping depth smaller than or equal to that of
the original network, regardless of the decomposition
method used; and (2) the problem of structural gate
decomposition for depth-optimal technology mapping is
NP-hard for $K$-unbounded networks when $ K \geq 3$ and
remains NP-hard for $K$-bounded networks when $ K \geq
5$. Based on these results, we propose two new
structural gate decomposition algorithms, named {\tt
DOGMA} and {\tt DOGMA-m}, which combine the
level-driven node-packing technique (used in FlowMap)
and the network flow-based labeling technique (used in
{\tt Chortle-d}) for depth-optimal technology mapping.
Experimental results show that (1) among five
structural gate decomposition algorithms, {\tt DOGMA-m}
results in the best mapping solutions; and (2) compared
with {\tt speed\_up} (an algebraic algorithm) and {\tt
TOS} (a Boolean approach), {\tt DOGMA-m} completes,
decomposition of all tested benchmarks in a short time
while {\tt speed\_up} and {\tt TOS} fail in several
cases. However, {\tt speed\_up} results in the smallest
depth and area in the following technology mapping
steps.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "computer-aided design of VSLI; decomposition; delay
minimization; FPGA; logic optimization; programmable
logic; simplification; synthesis; system design;
technology mapping",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1);
Hardware --- Logic Design --- Design Aids (B.6.3);
Hardware --- Logic Design --- Design Aids (B.6.3): {\bf
Automatic synthesis}; Hardware --- Integrated Circuits
--- Types and Design Styles (B.7.1)",
}
@Article{Hwang:2000:PSS,
author = "Chi-Hong Hwang and Allen C.-H. Wu",
title = "A predictive system shutdown method for energy saving
of event-driven computation",
journal = j-TODAES,
volume = "5",
number = "2",
pages = "226--241",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p226-hwang/p226-hwang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p226-hwang/",
abstract = "This paper presents a system-level power management
technique for energy savings of event-driven
application. We present a new predictive
system-shutdown method to exploit sleep mode operations
for energy saving. We use an exponential-average
approach to predict the upcoming idle period. We
introduce two mechanisms, prediction-miss correction
and prewake-up, to improve the hit ratio and to reduce
the delay overhead. Experiments on four different
event-driven applications show that our proposed method
achieves high hit ratios in a wide range of delay
overheads, which results in a high degree of energy
with low delay penalties.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "event-drive; power management; predictive; sleep mode;
system shutdown",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6)",
}
@Article{Sudarsanam:2000:SRA,
author = "Ashok Sudarsanam and Sharad Malik",
title = "Simultaneous reference allocation in code generation
for dual data memory bank {ASIPs}",
journal = j-TODAES,
volume = "5",
number = "2",
pages = "242--264",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-2/p242-sudarsanam/p242-sudarsanam.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-2/p242-sudarsanam/",
abstract = "We address the problem of code generation for DSP
systems on a chip. In such systems, the amount of
silicon devoted of program ROM is limited, so
application software must be sufficiently dense.
Additionally, the software must be written so as to
meet various high-performance constraints, which may
include hard real-time constraints. Unfortunately,
current compiler technology is unable to generate
high-quality code for DSPs, whose architectures are
highly irregular. Thus, designers often resort to
programming application software in assembly--a
time-consuming task. In this paper, we focus on
providing support for architectural feature of DSPs
that makes code generation difficult, namely multiple
data memory banks. This feature increases memory
bandwidth by permitting multiple data memory accesses
to occur in parallel when the referenced variables
belong to different data memory banks and the registers
involved conform to a strict set of conditions. We
present an algorithm that attempts to maximize the
benefit of this architectural feature. While previous
approaches have decoupled the phases of register
allocation and memory bank assignment, thereby
compromising code quality, our algorithm performs these
two phases simultaneously. Experimental results
demonstrate that our algorithm not only generates
high-quality compiled code, but also improves the
quality of completely-referenced code.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code generation; code optimization; graph labelling;
memory bank assignment; register allocation",
subject = "Software --- Programming Languages --- Processors
(D.3.4); Software --- Programming Languages ---
Processors (D.3.4): {\bf Code generation}; Software ---
Programming Languages --- Processors (D.3.4): {\bf
Compilers}; Software --- Programming Languages ---
Processors (D.3.4): {\bf Optimization}",
}
@Article{Irwin:2000:E,
author = "Mary Jane Irwin",
title = "Editorial",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "265--266",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p265-irwin/p265-irwin.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p265-irwin/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bahar:2000:POT,
author = "R. Iris Bahar and Ernest T. Lampe and Enrico Macii",
title = "Power optimization of technology-dependent circuits
based on symbolic computation of logic implications",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "267--293",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p267-bahar/p267-bahar.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p267-bahar/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "aids; automation; design synthesis; logic design",
subject = "Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Combinational logic}; Hardware --- Control
Structures and Microprogramming --- Microprogram Design
Aids (B.1.4): {\bf Optimization}; Hardware --- Logic
Design --- Design Aids (B.6.3): {\bf Optimization};
Hardware --- Performance and Reliability --- General
(B.8.0); Computer Applications --- Physical Sciences
and Engineering (J.2): {\bf Electronics}",
}
@Article{Balakrishnan:2000:AFS,
author = "M. Balakrishnan and Heman Khanna",
title = "Allocation of {FIFO} structures in {RTL} data paths",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "294--310",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p294-balakrishnan/p294-balakrishnan.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p294-balakrishnan/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "data path; FIFO; ILP; RTL; synthesis",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design (B.5.1); Mathematics of Computing ---
Probability and Statistics (G.3): {\bf Queueing
theory}",
}
@Article{Benini:2000:SLPb,
author = "L. Benini and G. {De Micheli}",
title = "Synthesis of low-power selectively-clocked systems
from high-level specification",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "311--321",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p311-benini/p311-benini.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p311-benini/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "algorithms; design; gated clock; high-level synthesis;
low power",
subject = "Hardware --- Control Structures and Microprogramming
--- Control Structure Performance Analysis and Design
Aids (B.1.2); Hardware --- Performance and Reliability
--- General (B.8.0); Theory of Computation ---
Computation by Abstract Devices --- Models of
Computation (F.1.1): {\bf Unbounded-action devices}",
}
@Article{Blythe:2000:EOD,
author = "Stephen A. Blythe and Robert A. Walker",
title = "Efficient optimal design space characterization
methodologies",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "322--336",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p322-blythe/p322-blythe.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p322-blythe/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bounding; clock-length determination; design space
exploration; efficient searching; high-level synthesis;
module selection; scheduling",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6); Hardware --- Performance and Reliability ---
General (B.8.0); Computing Methodologies --- Simulation
and Modeling --- General (I.6.0); Computer Applications
--- Physical Sciences and Engineering (J.2): {\bf
Electronics}",
}
@Article{Bogliolo:2000:RBR,
author = "Alessandro Bogliolo and Luca Benini and Giovanni {De
Micheli}",
title = "Regression-based {RTL} power modeling",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "337--372",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p337-bogliolo/p337-bogliolo.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p337-bogliolo/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "adaptive characterization; functional macros;
regression models; RTL design; RTL power modeling",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2); Hardware --- Logic Design ---
Design Aids (B.6.3); Hardware --- Performance and
Reliability --- General (B.8.0); Computing Milieux ---
Management of Computing and Information Systems ---
Installation Management (K.6.2): {\bf Benchmarks}",
}
@Article{Bommu:2000:RBF,
author = "Surendra Bommu and Niall O'Neill and Maciej
Ciesielski",
title = "Retiming-based factorization for sequential logic
optimization",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "373--398",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p373-bommu/p373-bommu.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p373-bommu/",
abstract = "Current sequential optimization techniques apply a
variety of logic transformations that mainly target the
combinational logic component of the circuit. Retiming
is typically applied as a postprocessing step to the
gate-level implementation obtained after technology
mapping. This paper introduces a new sequential logic
transformation which integrates retiming with logic
transformations at the technology-independent level.
This transformation is based on implicit retiming
across logic blocks and fanout stems during logic
optimization. Its application to sequential network
synthesis results in the optimization of logic across
register boundaries. It can be used in conjunction with
any measure of circuit quality for which a fast and
reliable gain estimation method can be obtained. We
implemented our new technique within the SIS framework
and demonstrated its effectiveness in terms of
cycle-time minimization on a set sequential benchmark
circuits.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "finite stat machines; retiming; sequential synthesis",
subject = "Hardware --- General (B.0); Hardware --- Logic Design
(B.6)",
}
@Article{Carchiolo:2000:HSS,
author = "Vincenza Carchiolo and Michele Malgeri and Giuseppe
Mangioni",
title = "Hardware\slash software synthesis of formal
specifications in codesign of embedded systems",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "399--432",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p399-carchiolo/p399-carchiolo.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p399-carchiolo/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "codesign; embedded system; hardware and software
synthesis",
subject = "Hardware --- Register-Transfer-Level Implementation
--- Design Aids (B.5.2); Computer Systems Organization
--- Special-Purpose and Application-Based Systems
(C.3): {\bf Real-time and embedded systems}; Computer
Systems Organization --- General (C.0); Software ---
Software Engineering --- Requirements/Specifications
(D.2.1); Theory of Computation --- Mathematical Logic
and Formal Languages --- Formal Languages (F.4.3)",
}
@Article{Chang:2000:TDR,
author = "Yao-Wen Chang and Kai Zhu and D. F. Wong",
title = "Timing-driven routing for symmetrical array-based
{FPGAs}",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "433--450",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p433-chang/p433-chang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p433-chang/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "computer-aided design of VLSI; field-programmable gate
array; layout; synthesis",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Gate arrays}; Theory of
Computation --- Analysis of Algorithms and Problem
Complexity --- Nonnumerical Algorithms and Problems
(F.2.2): {\bf Routing and layout}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Placement and routing}; Computer Applications ---
Computer-Aided Engineering (J.6)",
}
@Article{Gelosh:2000:MLT,
author = "Donald S. Gelosh and Dorothy E. Setliff",
title = "Modeling layout tools to derive forward estimates of
area and delay at the {RTL} level",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "451--491",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p451-gelosh/p451-gelosh.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p451-gelosh/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "estimation; estimation techniques; layout; machine
learning; VLSI CAD",
subject = "Hardware --- Input/Output and Data Communications ---
Performance Analysis and Design Aids** (B.4.4);
Hardware --- Register-Transfer-Level Implementation ---
Design Aids (B.5.2): {\bf Automatic synthesis};
Computer Applications --- Computer-Aided Engineering
(J.6); Hardware --- Integrated Circuits --- Types and
Design Styles (B.7.1): {\bf VLSI (very large scale
integration)}; Computing Methodologies --- Artificial
Intelligence --- Learning (I.2.6): {\bf Concept
learning}; Computing Methodologies --- Simulation and
Modeling --- Simulation Output Analysis (I.6.6)",
}
@Article{Gogniat:2000:CBE,
author = "G. Gogniat and M. Auguin and L. Bianco and A.
Pegatoquet",
title = "A codesign back-end approach for embedded system
design",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "492--509",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p492-gogniat/p492-gogniat.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p492-gogniat/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "codesign; communications synthesis; HW&slash; SW
integration; template architecture",
subject = "Computer Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf Real-time and
embedded systems}; Computer Applications ---
Computer-Aided Engineering (J.6); Hardware ---
Integrated Circuits --- Types and Design Styles
(B.7.1): {\bf Advanced technologies}",
}
@Article{Gupta:2000:CIP,
author = "Avaneendra Gupta and John P. Hayes",
title = "{CLIP}: integer-programming-based optimal layout
synthesis of {$2$D CMOS} cells",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "510--547",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p510-gupta/p510-gupta.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p510-gupta/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "circuit clustering; CMOS networks; diffusion sharing;
integer linear programming; integer programming; layout
optimization; leaf cell synthesis; module generation;
transistor chains; two-dimensional layout",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1): {\bf Memory technologies}; Hardware ---
Integrated Circuits --- Design Aids (B.7.2): {\bf
Layout}; Hardware --- Integrated Circuits --- Design
Aids (B.7.2): {\bf Simulation}; Mathematics of
Computing --- Numerical Analysis --- Optimization
(G.1.6): {\bf Integer programming}; Software ---
Programming Languages --- Language Classifications
(D.3.2): {\bf Specialized application languages};
Computer Applications --- Computer-Aided Engineering
(J.6)",
}
@Article{Hsiao:2000:DST,
author = "Michael S. Hsiao and Elizabeth M. Rudnick and Janak H.
Patel",
title = "Dynamic state traversal for sequential circuit test
generation",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "548--565",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p548-hsiao/p548-hsiao.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p548-hsiao/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "automatic test pattern generation (ATPG);
finite-state-machine traversal; genetic algorithms;
sequential circuits; simulation-based; testing",
subject = "Hardware --- Performance and Reliability ---
Reliability, Testing, and Fault-Tolerance (B.8.1);
Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Sequential circuits}; Computer Applications ---
Computer-Aided Engineering (J.6); Computing
Methodologies --- Artificial Intelligence --- Problem
Solving, Control Methods, and Search (I.2.8): {\bf
Heuristic methods}",
}
@Article{Jha:2000:HLL,
author = "Pradip K. Jha and Nikil D. Dutt",
title = "High-level library mapping for memories",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "566--603",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p566-jha/p566-jha.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p566-jha/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "high-level synthesis; memory libraries;
technology-mapping",
subject = "Hardware --- Memory Structures --- Design Styles
(B.3.2): {\bf Primary memory}; Hardware ---
Register-Transfer-Level Implementation --- Design
(B.5.1): {\bf Memory design}; Computer Applications ---
Computer-Aided Engineering (J.6); Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Automatic synthesis}",
}
@Article{Lalgudi:2000:OCE,
author = "Kumar N. Lalgudi and Marios C. Papaefthymiou and
Miodrag Potkonjak",
title = "Optimizing computations for effective
block-processing",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "604--630",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p604-lalgudi/p604-lalgudi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p604-lalgudi/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "combinatorial optimization; computation dataflow
graphs; embedded systems; high-level synthesis; integer
linear programming; retiming; scheduling;
vectorization",
subject = "Computer Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf Signal processing
systems}; Computing Methodologies --- Pattern
Recognition --- Applications (I.5.4): {\bf Signal
processing}; Mathematics of Computing --- Numerical
Analysis --- Optimization (G.1.6): {\bf Integer
programming}; Mathematics of Computing --- Discrete
Mathematics --- General (G.2.0); Theory of Computation
--- Analysis of Algorithms and Problem Complexity ---
Nonnumerical Algorithms and Problems (F.2.2): {\bf
Sequencing and scheduling}; Computing Methodologies ---
Artificial Intelligence --- Problem Solving, Control
Methods, and Search (I.2.8): {\bf Scheduling}; Computer
Applications --- Computer-Aided Engineering (J.6)",
}
@Article{Long:2000:FFA,
author = "David E. Long and Mahesh A. Iyer and Miron
Abramovici",
title = "{FILL} and {FUNI}: algorithms to identify illegal
states and sequentially untestable faults",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "631--657",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p631-long/p631-long.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p631-long/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "automatic test generation; illegal states; sequential
circuits; untestable faults",
subject = "Hardware --- Performance and Reliability --- General
(B.8.0); Hardware --- Control Structures and
Microprogramming --- General (B.1.0); Hardware ---
Arithmetic and Logic Structures --- General (B.2.0);
Computer Applications --- Computer-Aided Engineering
(J.6); Hardware --- Arithmetic and Logic Structures ---
High-Speed Arithmetic (B.2.4): {\bf Algorithms};
Hardware --- Logic Design --- Design Styles (B.6.1):
{\bf Sequential circuits}; Hardware --- Logic Design
--- Design Aids (B.6.3); Hardware --- Integrated
Circuits --- Types and Design Styles (B.7.1)",
}
@Article{Marculescu:2000:SSM,
author = "Diana Marculescu and Radu Marculescu and Massoud
Pedram",
title = "Stochastic sequential machine synthesis with
application to constrained sequence generation",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "658--681",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p658-marculescu/p658-marculescu.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p658-marculescu/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "algorithms; design; performance; theory",
subject = "Data --- Coding and Information Theory (E.4): {\bf
Data compaction and compression}; Computer Applications
--- Computer-Aided Engineering (J.6); Hardware ---
Logic Design --- Design Aids (B.6.3); Hardware ---
Integrated Circuits --- Types and Design Styles
(B.7.1): {\bf VLSI (very large scale integration)};
Hardware --- Performance and Reliability --- General
(B.8.0); Theory of Computation --- Computation by
Abstract Devices --- Models of Computation (F.1.1);
Mathematics of Computing --- Probability and Statistics
(G.3): {\bf Stochastic processes}",
}
@Article{Panda:2000:CVC,
author = "Preeti Ranjan Panda and Nikil D. Dutt and Alexandru
Nicolau",
title = "On-chip vs. off-chip memory: the data partitioning
problem in embedded processor-based systems",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "682--704",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p682-panda/p682-panda.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p682-panda/",
abstract = "Efficient utilization of on-chip memory space is
extremely important in modern embedded system
applications based on processor cores. In addition to a
data cache that interfaces with slower off-chip memory,
a fast on-chip SRAM, called Scratch-Pad memory, is
often used in several applications, so that critical
data can be stored there with a guaranteed fast access
time. We present a technique for efficiently exploiting
on-chip Scratch-Pad memory by partitioning the
application's scalar and arrayed variables into
off-chip DRAM and on-chip Scratch-Pad SRAM, with the
goal of minimizing the total execution time of embedded
applications. We also present extensions of our
proposed memory assignment strategy to handle context
switching between multiple programs, as well as a
generalized memory hierarchy. Our experiments on code
kernels from typical applications show that our
technique results in significant performance
improvements.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "data cache; data partitioning; memory synthesis;
on-chip memory; scratch-pad memory; system design;
system synthesis",
subject = "Hardware --- Memory Structures --- Design Styles
(B.3.2): {\bf Cache memories}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Compilers}",
}
@Article{Raimi:2000:EML,
author = "Richard Raimi and Ramin Hojati and Kedar S. Namjoshi",
title = "Environment modeling and language universality",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "705--725",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p705-raimi/p705-raimi.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p705-raimi/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "abstraction; environment modeling; language
universality; model checking",
subject = "Hardware --- Performance and Reliability ---
Reliability, Testing, and Fault-Tolerance (B.8.1);
Computer Systems Organization --- Performance of
Systems (C.4); Computer Applications --- Computer-Aided
Engineering (J.6); Theory of Computation ---
Computation by Abstract Devices --- Models of
Computation (F.1.1): {\bf Automata}; Software ---
Software Engineering --- Software/Program Verification
(D.2.4): {\bf Model checking}; Theory of Computation
--- Computation by Abstract Devices --- Models of
Computation (F.1.1): {\bf Unbounded-action devices}",
}
@Article{Yan:2000:TLB,
author = "Jin-Tai Yan",
title = "Three-layer bubble-sorting-based {nonManhattan}
channel routing",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "726--734",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p726-yan/p726-yan.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p726-yan/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bubble-sorting algorithm; channel routing; three-layer
nonManhattan routing model",
subject = "Theory of Computation --- Analysis of Algorithms and
Problem Complexity --- Nonnumerical Algorithms and
Problems (F.2.2): {\bf Routing and layout}; Hardware
--- Integrated Circuits --- Design Aids (B.7.2): {\bf
Placement and routing}; Hardware --- Integrated
Circuits --- Design Aids (B.7.2): {\bf Verification};
Hardware --- Performance and Reliability --- General
(B.8.0); Computer Applications --- Computer-Aided
Engineering (J.6); Hardware --- Input/Output and Data
Communications --- Input/Output Devices (B.4.2): {\bf
Channels and controllers}",
}
@Article{Yang:2000:ERC,
author = "Cheng-Hsing Yang and Sao-Jie Chen and Jan-Ming Ho and
Chia-Chun Tsai",
title = "Efficient routability check algorithms for segmented
channel routing",
journal = j-TODAES,
volume = "5",
number = "3",
pages = "735--747",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-3/p735-yang/p735-yang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-3/p735-yang/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "field programmable gate arrays (FPGAs); routing;
segmented channel",
subject = "Hardware --- Input/Output and Data Communications ---
Input/Output Devices (B.4.2): {\bf Channels and
controllers}; Hardware --- Integrated Circuits ---
Types and Design Styles (B.7.1): {\bf Gate arrays};
Computer Applications --- Computer-Aided Engineering
(J.6); Hardware --- Integrated Circuits --- Design Aids
(B.7.2): {\bf Placement and routing}; Theory of
Computation --- Analysis of Algorithms and Problem
Complexity --- Nonnumerical Algorithms and Problems
(F.2.2): {\bf Routing and layout}",
}
@Article{Marwedel:2000:GE,
author = "Peter Marwedel",
title = "Guest {Editorial}",
journal = j-TODAES,
volume = "5",
number = "4",
pages = "749--751",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p749-marwedel/p749-marwedel.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p749-marwedel/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Computing Milieux --- Computers and Society ---
Organizational Impacts (K.4.3)",
}
@Article{Aditya:2000:CSM,
author = "Shail Aditya and Scott A. Mahlke and B. Ramakrishna
Rau",
title = "Code size minimization and retargetable assembly for
custom {EPIC} and {VLIW} instruction formats",
journal = j-TODAES,
volume = "5",
number = "4",
pages = "752--773",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p752-aditya/p752-aditya.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p752-aditya/",
abstract = "PICO is a fully automated system for designing the
architecture and the microarchitecture of VLIW and EPIC
processors. A serious concern with this class of
processors, due to their very long instructions, is
their code size. One focus of this paper is to describe
a series of code size minimization techniques used
within PICO, some of which are applied during the
automatic design of the instruction format, while
others are applied during program assembly. The design
of a retargetable assembler to support these techniques
also poses certain novel challenges, which constitute
the second focus of this paper. Contrary to widely held
perceptions, we demonstrate that it is entirely
possible to design VLIW and EPIC processors that are
capable of issuing large numbers of operational per
cycle, but whose code size is only moderately larger
than that for a sequential CISC processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Measurement",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code size minimization; custom templates; design
automation; EPIC; instruction format design; noop
compression; retargetable assembly; VLIW",
subject = "Computer Systems Organization --- Processor
Architectures --- Single Data Stream Architectures
(C.1.1): {\bf RISC/CISC, VLIW architectures}; Software
--- Programming Languages --- Processors (D.3.4): {\bf
Code generation}; Software --- Programming Languages
--- Processors (D.3.4): {\bf Retargetable compilers};
Hardware --- Control Structures and Microprogramming
--- Control Structure Performance Analysis and Design
Aids (B.1.2)",
}
@Article{VanEijk:2000:CAC,
author = "Koen {Van Eijk} and Bart Mesman and Carlos A. Alba
Pinto and Qin Zhao and Marco Bekooij and Jef {Van
Meerbergen} and Jochen Jess",
title = "Constraint analysis for code generation: basic
techniques and applications in {FACTS}",
journal = j-TODAES,
volume = "5",
number = "4",
pages = "774--793",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 09:50:12 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p774-van_eijk/p774-van_eijk.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p774-van_eijk/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Leupers:2000:GBC,
author = "Rainer Leupers and Steven Bashford",
title = "Graph-based code selection techniques for embedded
processors",
journal = j-TODAES,
volume = "5",
number = "4",
pages = "794--814",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p794-leupers/p794-leupers.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p794-leupers/",
abstract = "Code selection is an important task in code generation
for programmable processors, where the goal is to find
an efficient mapping of machine-independent
intermediate code to processor-specific machine
instructions. Traditional approaches to code selection
are based on tree parsing which enables fast and
optimal code selection for intermediate code given as a
set of data-flow trees. While this approach is
generally useful in compilers for general-purpose
processors, it may lead to poor code quality in the
case of embedded processors. The reason is that the
special architectural features of embedded processors
require performing code selection on data-flow graphs,
which are a more general representation of intermediate
code. In this paper, we present data-flow graph-based
code selection techniques for two architectural
families of embedded processors: media processors with
support for SIMD instructions and fixed-point DSPs with
irregular data paths. Both techniques exploit the fact
that, in the area of embedded systems, high code
quality is a much more important goal than high
compilation speed. We demonstrate that certain
architectural features can only be utilized by
graph-based code selection, while in other cases this
approach leads to a significant increase in code
quality as compared to tree-based code selection.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Experimentation",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code selection; data-flow graphs; embedded processors;
irregular data paths; SIMD instructions",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Code generation}",
}
@Article{Pees:2000:RCS,
author = "Stefan Pees and Andreas Hoffmann and Heinrich Meyr",
title = "Retargetable compiled simulation of embedded
processors using a machine description language",
journal = j-TODAES,
volume = "5",
number = "4",
pages = "815--834",
month = jan,
year = "2000",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2000-5-4/p815-pees/p815-pees.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2000-5-4/p815-pees/",
abstract = "Fast processor simulators are needed for the software
development of embedded processors, for HW/SW
cosimulation systems, and for profiling and design of
application-specific processors. Such fast simulators
can be generated based on the machine description
language LISA. Using this language to model processor
architectures enables the generation of compiled
simulators on various abstraction levels, assemblers,
and compiler back ends. The article discusses the
requirements of software development tools on processor
models and presents the approach based on the LISA
language. Furthermore, the implementation of a
retargetable environment consisting of compiled
simulator, debugger, and assembler is presented.
Measurements for a verified, cycle-based LISA model of
the TI TMS320C62$ \times $ DSP show that this approach
achieves between 37$ \times $ and 170$ \times $ higher
simulation speed compared to a commercial simulator
using a standard technique and the same accuracy
level.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Languages; Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compiled simulation; DSP processors; HW/SW
cosimulation; instruction set simulators; machine
description languages; processor modeling and
simulation; system-on-chip",
subject = "Computing Methodologies --- Simulation and Modeling
--- Model Development (I.6.5): {\bf Modeling
methodologies}; Computer Systems Organization ---
Special-Purpose and Application-Based Systems (C.3):
{\bf Real-time and embedded systems}; Hardware ---
Control Structures and Microprogramming --- Control
Structure Performance Analysis and Design Aids (B.1.2):
{\bf Simulation**}",
}
@Article{Bakshi:2001:PCH,
author = "Smita Bakshi and Daniel D. Gajski",
title = "Performance-constrained hierarchical pipelining for
behaviors, loops, and operations",
journal = j-TODAES,
volume = "6",
number = "1",
pages = "1--25",
month = apr,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 09:50:12 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p1-bakshi/",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chakrabarty:2001:OTA,
author = "Krishnendu Chakrabarty",
title = "Optimal test access architectures for
system-on-a-chip",
journal = j-TODAES,
volume = "6",
number = "1",
pages = "26--49",
month = jan,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p26-chakrabarty/p26-chakrabarty.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p26-chakrabarty/",
abstract = "Test access is a major problem for core-based
system-on-a-chip (SOC) designs. Since embedded cores in
an SOC are not directly accessible via chip inputs and
outputs, special access mechanisms are required to test
them at the system level. An efficient test access
architecture should also reduce test cost by minimizing
test application time. We address several issues
related to the design of optimal test access
architectures that minimize testing time., including
the assignment of cores to test buses, distribution of
test data width between multiple test buses, and
analysis of test data width required to satisfy an
upper bound on the testing time. Even though the
decision versions of all these problems are shown to be
NP-complete, they can be solved exactly for practical
instances using integer linear programming (ILP). As a
case study, the ILP models for two hypothetical but
nontrivial systems are solved using a public-domain ILP
software package.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Integrated Circuits --- Types and Design
Styles (B.7.1); Hardware --- Integrated Circuits ---
Design Aids (B.7.2); Hardware --- Integrated Circuits
--- Reliability and Testing** (B.7.3)",
}
@Article{Chen:2001:ALP,
author = "Rita Yu Chen and Mary Jane Irwin and Raminder S.
Bajwa",
title = "Architecture-level power estimation and design
experiments",
journal = j-TODAES,
volume = "6",
number = "1",
pages = "50--66",
month = jan,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p50-chen/p50-chen.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p50-chen/",
abstract = "Architecture-level power estimation has received more
attention recently because of its efficiency. This
article presents a technique used to do power analysis
of processors at the architecture level. It provides
cycle-by-cycle power consumption data of the
architecture on the basis of the instruction/data flow
stream. To characterize the power dissipation of
control units, a novel hierarchical method has been
developed. Using this technique, a power estimator is
implemented for a commercial processor. The accuracy of
the estimator is validated by comparing the power
values it produces against measurements made by a
gate-level power simulator for the same benchmark set.
Our estimation approach is shown to provide very
efficient and accurate power analysis at the
architecture level. The energy models built for
first-pass estimation (such as ALU, MAC unit, register
files) are reusable for future architecture design
modification. In this article, we demonstrate the
application of the technique. Furthermore, this
technique can evaluate various kinds of software to
achieve hardware/software codesign for low power.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "architecture tradeoff; architecture-level power
estimation; computer-aided design of VLSI; control
unit; energy model; energy table; functional unit;
hardware/software codesign; instruction format
transition; low power design; output signal transition;
power analysis and estimation; switch capacitance",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6)",
}
@Article{Hsiung:2001:PPO,
author = "Pao-Ann Hsiung",
title = "{POSE}: a parallel object-oriented synthesis
environment",
journal = j-TODAES,
volume = "6",
number = "1",
pages = "67--92",
month = jan,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p67-hsiung/p67-hsiung.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p67-hsiung/",
abstract = "Design automation tools and methodologies always
encounter a problem of how systems may be designed
efficiently, including issues such as static modeling
and dynamic manipulation of system parts. With the
rapid progress of design technology, the continuously
increasing number of different choices per system part
and the growing complexity of today's systems, the
efficiency of the design environment is not only a
major concern now, but will also be a demanding problem
in the near future. In contrast to heuristic methods, a
novel environment called POSE is proposed that
increases efficiency during design without losing
optimality in the final design results. System parts
are modeled using the popular object-oriented modeling
technique and are dynamically manipulated using the
parallel design technique. A complete integration of
object-oriented and parallel techniques is one of the
major feature of POSE. Common problems related to
parallel design such as {\em emptiness\/} and {\em
deadlock\/} are also elegantly solved within POSE.
Experimental results and formal analysis based on POSE
all show its practical and theoretical usefulness. POSE
can be used at any level of synthesis as long as
off-the-shelf building-blocks manipulation is required.
POSE can be applied especially to {\em system-level\/}
synthesis, whose targets can be parallel computer
architectures, systems-on-chip, or embedded systems. We
will show how POSE has been applied to ICOS, a recently
proposed synthesis methodology. Furthermore, POSE can
be easily integrated with other heuristic design
methodologies to allow increased design efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design-completion check; hardware synthesis;
object-oriented technology; parallel design; synthesis
rollback",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}; Hardware ---
Miscellaneous (B.m): {\bf Design management}",
}
@Article{Huang:2001:CSP,
author = "Ing-Jer Huang",
title = "Co-synthesis of pipelined structures and instruction
reordering constraints for instruction set processors",
journal = j-TODAES,
volume = "6",
number = "1",
pages = "93--121",
month = jan,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p93-huang/p93-huang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p93-huang/",
abstract = "This paper presents a hardware/software co-synthesis
approach to pipelined ISP (instruction set processor)
design. The approach synthesizes the pipeline structure
from a given instruction set architecture (behavioral)
specification. In addition, it generates a set of
reordering constraints that guides the compiler
back-end (reorderer) to properly schedule instructions
so that possible pipeline hazards are avoided and
throughput is improved. \par
Co-synthesis takes place while resolving pipeline
hazards, which can be attributed to interim-instruction
dependencies (IIDs). An extended taxonomy of IIDs have
been proposed for the systematic analysis of pipeline
hazards. Hardware/software methods are developed to
resolve IIDs. Algorithms based on taxonomy and
resolutions are constructed and integrated into the
pipeline synthesis process to explore hardware and
software design space. Application benchmarks are used
to evaluate possible designs and guide the design
decision. The power of the co-synthesis tool PIPER is
demonstrated through pipeline synthesis of one
illustrative example and two ISPs, including an
industrial one (TDY-43). In comparison with other
related approaches, our approach achieves higher
throughput and provides a systematic way to explore the
hardware/software trade-off.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compiler instruction optimization\; instruction set
processor; pipeline hazards; pipeline taxonomy;
synthesis",
subject = "Hardware --- Control Structures and Microprogramming
--- Control Structure Performance Analysis and Design
Aids (B.1.2): {\bf Automatic synthesis**}",
}
@Article{Mariatos:2001:MAC,
author = "E. P. Mariatos and A. N. Birbas and M. K. Birbas",
title = "A mapping algorithm for computer-assisted exploration
in the design of embedded systems",
journal = j-TODAES,
volume = "6",
number = "1",
pages = "122--147",
month = jan,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
note = "See note \cite{Chen:2007:NMA}.",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-1/p122-mariatos/p122-mariatos.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-1/p122-mariatos/",
abstract = "We present a technique for automatic exploration of
architectural alternatives in the design of complex
electronic embedded systems and systems-on-a-chip. The
technique transforms the problem into a set of simple
model-to-model operations and a mapping algorithm that
becomes the core of the entire design process. The
mapping algorithm is formulated as an assignment-type
problem (ATP), which is, in turn, solved by a
straightforward optimization method. The result is a
design assistance tool, which is demonstrated through a
telecommunication systems example.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Design; Experimentation",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "codesign; embedded system design space exploration;
specification mapping",
subject = "Computer Systems Organization --- Special-Purpose and
Application-Based Systems (C.3); Software --- Software
Engineering --- Design Tools and Techniques (D.2.2):
{\bf Computer-aided software engineering (CASE)}",
}
@Article{Panda:2001:DMO,
author = "P. R. Panda and F. Catthoor and N. D. Dutt and K.
Danckaert and E. Brockmeyer and C. Kulkarni and A.
Vandercappelle and P. G. Kjeldsberg",
title = "Data and memory optimization techniques for embedded
systems",
journal = j-TODAES,
volume = "6",
number = "2",
pages = "149--206",
month = jan,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p149-panda/p149-panda.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p149-panda/",
abstract = "We present a survey of the state-of-the-art techniques
used in performing data and memory-related
optimizations in embedded systems. The optimizations
are targeted directly or indirectly at the memory
subsystem, and impact one or more out of three
important cost metrics: area, performance, and power
dissipation of the resulting implementation. \par
We first examine architecture-independent optimizations
in the form of code transformations. We next cover a
broad spectrum of optimization techniques that address
memory architectures at varying levels of granularity,
ranging from register files to on-chip memory, data
caches, and dynamic memory (DRAM). We end with memory
addressing related issues.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "address generation; allocation; architecture
exploration; code transformation; data cache; data
optimization; DRAM; high-level synthesis; memory
architecture customization; memory power dissipation;
register file; size estimation; SRAM; survey",
subject = "Hardware --- Memory Structures --- General (B.3.0);
Hardware --- Register-Transfer-Level Implementation ---
Design (B.5.1): {\bf Memory design}; Hardware ---
Register-Transfer-Level Implementation --- Design Aids
(B.5.2): {\bf Optimization}; Hardware --- Integrated
Circuits --- Types and Design Styles (B.7.1): {\bf
Memory technologies}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Optimization}",
}
@Article{Shenoy:2001:ASL,
author = "Nagaraj Shenoy and Alok Choudhary and Prithviraj
Banerjee",
title = "An algorithm for synthesis of large time-constrained
heterogeneous adaptive systems",
journal = j-TODAES,
volume = "6",
number = "2",
pages = "207--225",
month = apr,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p207-shenoy/p207-shenoy.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p207-shenoy/",
abstract = "Large time-constrained applications are highly
computer-intensive and are often implemented as a
complex organization of pipelined data parallel tasks
on a pool of embedded processors, DSP processors, and
FPGAs. The large number of design alternatives
available at each task level, the application as a
whole, and the special needs of the reconfigurable
devices (such as the FPGA) make the manual synthesis of
such systems very tedious. \par
The automatic synthesis algorithm in this paper
combines exact (MILP-based) and heuristic techniques to
solve this problem, which basically involves (1)
propagation of timing constraints; (2) pipelining the
loops to meet throughput requirements; (3) resource
selection and scheduling, keeping the processing
requirements and the timing constraints in view; (4)
scheduling the resources across the tasks to ensure
maximum utilization; and (5) hiding the reconfiguration
delays of the FPGAs. \par
While the use of MILP techniques helps in getting
high-quality results, combining them with heuristics
ensures acceptable synthesis times, striking a good
balance between quality of results and synthesis time.
Our experimental evaluation of the algorithm shows an
average 40\% in resource cost reduction (compared to
manual synthesis) with synthesis times from minutes to
as low as a few seconds in some cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Design; Experimentation",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "delay/cost table; hierarchical control data-flow
graph; list scheduling; mixed integer linear
programming; pipelining; reconfigurable computing;
time-constrained synthesis",
subject = "Computer Applications --- Computer-Aided Engineering
(J.6): {\bf Computer-aided design (CAD)}; Computer
Systems Organization --- Special-Purpose and
Application-Based Systems (C.3): {\bf Real-time and
embedded systems}",
}
@Article{Su:2001:IRA,
author = "Chauchin Su and Yue-Tsang Chen and Shyh-Jye Jou",
title = "Intrinsic response for analog module testing using an
analog testability bus",
journal = j-TODAES,
volume = "6",
number = "2",
pages = "226--243",
month = apr,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p226-su/p226-su.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p226-su/",
abstract = "A parasitic effect removal methodology is proposed to
handle the large parasitic effects in analog
testability buses. The removal is done by an on-chip
test generation technique and an intrinsic response
extraction algorithm. On-chip test generation creates
test signals on-chip to avoid the parasitic effects of
the test application bus. The intrinsic response
extraction cross-checks and cancels the parasitic
effects of both test application and response
observation paths. The tests using both SPICE
simulation and MNABST-1 P1149.4 test chip reveal that
the proposed algorithm can not only remove the
parasitic effects of the test buses but also tolerate
test signal variations. Furthermore, it is robust
enough to handle loud environmental noise and the
nonlinearity of the switching devices.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Experimentation; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "analog testability bus; analog testing; boundary scan;
design for testability; intrinsic response",
subject = "Hardware --- Performance and Reliability ---
Reliability, Testing, and Fault-Tolerance (B.8.1)",
}
@Article{Huang:2001:VSE,
author = "Shi-Yu Huang and Kwang-Ting Cheng and Kuang-Chien
Chen",
title = "Verifying sequential equivalence using {ATPG}
techniques",
journal = j-TODAES,
volume = "6",
number = "2",
pages = "244--275",
month = apr,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/2001-6-2/p244-huang/p244-huang.pdf;
http://www.acm.org/pubs/citations/journals/todaes/2001-6-2/p244-huang/",
abstract = "In this paper we address the problem of verifying the
equivalence of two sequential circuits.
State-of-the-art sequential optimization techniques
such as retiming and sequential redundancy removal can
handle designs with up to hundreds or even thousands of
flip-flops. However, the BDD-based approaches for
verifying sequential equivalence can easily run into
memory explosion for such designs. In an attempt to
handle larger circuits, we modify test
pattern-generation techniques for verification. The
suggested approach utilizes the popular efficient
backward-justification technique used in most
sequential ATPG programs. We present several techniques
to enhance the efficiency of this approach by (1)
identifying equivalent flip-flop pairs using an
induction-based algorithm, and (2) generalizing the
idea of exploring the structural similarity between
circuits to perform verification in stages. This
ATPG-based framework is suitable for verifying circuits
either with or without a reset state. In order to
extend this approach to verify retimed circuits, we
introduce a delay-compensation-based algorithm for
preprocessing the circuits. The experimental results of
verifying the correctness of circuits after sequential
redundancy removal and retiming with up to several
hundred flip-flops are presented.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Verification}; Hardware --- Logic Design ---
Design Styles (B.6.1): {\bf Sequential circuits}",
}
@Article{VanPraet:2001:PMC,
author = "J. {Van Praet} and D. Lanneer and W. Geurts and G.
Goossens",
title = "Processor modeling and code selection for retargetable
compilation",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "277--307",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kagaris:2001:NHC,
author = "D. Kagaris and S. Tragoudas",
title = "{Von Neumann} hybrid cellular automata for generating
deterministic test sequences",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "308--321",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liao:2001:CPT,
author = "Swanwa Liao and Mario A. Lopez and Dinesh Mehta",
title = "Constrained polygon transformations for incremental
floorplanning",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "322--342",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chu:2001:CFS,
author = "Chris Chu and D. F. Wong",
title = "Closed form solutions to simultaneous buffer
insertion\slash sizing and wire sizing",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "343--371",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hu:2001:ELA,
author = "Xiaobo Sharon Hu and Danny Z. Chen and Rajeshkumar
Sambandam",
title = "Efficient list-approximation techniques for floorplan
area minimization",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "372--400",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nourani:2001:ITI,
author = "Mehrdad Nourani and Joan Carletta and Christos
Papachristou",
title = "Integrated test of interacting controllers and
datapaths",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "401--422",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Parulkar:2001:IRC,
author = "Ishwar Parulkar and Sandeep K. Gupta and Melvin A.
Breuer",
title = "Introducing redundant computations in {RTL} data paths
for reducing {BIST} resources",
journal = j-TODAES,
volume = "6",
number = "3",
pages = "423--445",
month = jul,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:45 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dasgupta:2001:SRG,
author = "Parthasarathi Dasgupta and Susmita Sur-Kolay",
title = "Slicible rectangular graphs and their optimal
floorplans",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "447--470",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hartanto:2001:DSS,
author = "Ismed Hartanto and Srikanth Venkataraman and W. Kent
Fuchs and Elizabeth M. Rudnick and Janak H. Patel and
Sreejit Chakravarty",
title = "Diagnostic simulation of stuck-at faults in sequential
circuits using compact lists",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "471--489",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Narasimhan:2001:FAC,
author = "M. Narasimhan and J. Ramanujam",
title = "A fast approach to computing exact solutions to the
resource-constrained scheduling problem",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "490--500",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Karri:2001:IRT,
author = "Ramesh Karri and Balakrishnan Iyer",
title = "Introspection: a register transfer level technique for
cocurrent error detection and diagnosis in data
dominated designs",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "501--515",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Boyer:2001:ODS,
author = "Fran{\c{c}}ois R. Boyer and El Mostapha Aboulhamid and
Yvon Savaria and Michel Boyer",
title = "Optimal design of synchronous circuits using software
pipelining techniques",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "516--532",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Voeten:2001:FLT,
author = "Jeroen Voeten",
title = "On the fundamental limitations of transformational
design",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "533--552",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shiue:2001:DMD,
author = "Wen-Tsong Shiue and Sathishkumar Udayanarayanan and
Chaitali Chakrabarti",
title = "Data memory design and exploration for low-power
embedded systems",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "553--568",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ashar:2001:UCD,
author = "Pranav Ashar and Aarti Gupta and Sharad Malik",
title = "Using complete-$1$-distinguishability for {FSM}
equivalence checking",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "569--590",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2001:ODC,
author = "Tai-Hung Liu and Adnan Aziz and Vigyan Singhal",
title = "Optimizing designs containing black boxes",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "591--601",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Roop:2001:FST,
author = "Partha S. Roop and A. Sowmya and S. Ramesh",
title = "Forced simulation: a technique for automating
component reuse in embedded systems",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "602--628",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Flores:2001:ESM,
author = "Paulo F. Flores and Hor{\'a}cio C. Neto and Jo{\~a}o
P. Marques-Silva",
title = "An exact solution to the minimum size test pattern
problem",
journal = j-TODAES,
volume = "6",
number = "4",
pages = "629--644",
month = oct,
year = "2001",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Feb 19 14:35:44 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chowdhary:2002:GTM,
author = "Amit Chowdhary and John P. Hayes",
title = "General technology mapping for field-programmable gate
arrays based on lookup tables",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "1--32",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Michael:2002:ATD,
author = "M. Michael and S. Tragoudas",
title = "{ATPG} tools for delay faults at the functional
level",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "33--57",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lysecky:2002:PIB,
author = "Roman Lysecky and Frank Vahid",
title = "Prefetching for improved bus wrapper performance in
cores",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "58--90",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dutt:2002:CAI,
author = "Shantanu Dutt and Wenyong Deng",
title = "Cluster-aware iterative improvement techniques for
partitioning large {VLSI} circuits",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "91--121",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Goodby:2002:MSP,
author = "Laurence Goodby and Alex Orailo{\u{g}}lu and Paul M.
Chau",
title = "Microarchitectural synthesis of
performance-constrained, low-power {VLSI} designs",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "122--136",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{GuerraeSilva:2002:SMA,
author = "Lu{\'\i}s {Guerra e Silva} and Jo{\~a}o Marques-Silva
and L. Miguel Silveira and Karem A. Sakallah",
title = "Satisfiability models and algorithms for circuit delay
computation",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "137--158",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:28:44 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Darte:2002:CEL,
author = "Alain Darte and Robert Schreiber and B. Ramakrishna
Rau and Fr{\'e}d{\'e}ric Vivien",
title = "Constructing and exploiting linear schedules with
prescribed parallelism",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "159--172",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jagannathan:2002:FAC,
author = "Ashok Jagannathan and Sung-Woo Hur and John Lillis",
title = "A fast algorithm for context-aware buffer insertion",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "173--188",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vemuri:2002:ERO,
author = "Ranga Vemuri and Srinivas Katkoori and Meenakshi Kaul
and Jay Roy",
title = "An efficient register optimization algorithm for
high-level synthesis from hierarchical behavioral
specifications",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "189--216",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2002:OTB,
author = "Shi-Zheng Eric Lin and Chieh Changfan and Yu-Chin Hsu
and Fur-Shing Tsai",
title = "Optimal time borrowing analysis and timing budgeting
optimization for latch-based designs",
journal = j-TODAES,
volume = "7",
number = "1",
pages = "217--230",
month = jan,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:03 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dasgupta:2002:MBP,
author = "Parthasarathi Dasgupta and Peichen Pan and Subhas C.
Nandy and Bhargab B. Bhattacharya",
title = "Monotone bipartitioning problem in a planar point set
with applications to {VLSI}",
journal = j-TODAES,
volume = "7",
number = "2",
pages = "231--248",
month = apr,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Corno:2002:IAS,
author = "F. Corno and P. Prinetto and M. Rebaudengo and M.
Sonza Reorda and G. Squillero",
title = "Initializability analysis of synchronous sequential
circuits",
journal = j-TODAES,
volume = "7",
number = "2",
pages = "249--264",
month = apr,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2002:LTL,
author = "Ki-Wook Kim and Taewhan Kim and Ting-Ting Hwang and
Sung-Mo Kang and C. L. Liu",
title = "Logic transformation for low-power synthesis",
journal = j-TODAES,
volume = "7",
number = "2",
pages = "265--283",
month = apr,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tessier:2002:FPA,
author = "Russell Tessier",
title = "Fast placement approaches for {FPGAs}",
journal = j-TODAES,
volume = "7",
number = "2",
pages = "284--305",
month = apr,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2002:TMA,
author = "Min Zhao and Sachin S. Sapatnekar",
title = "Technology mapping algorithms for domino logic",
journal = j-TODAES,
volume = "7",
number = "2",
pages = "306--335",
month = apr,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Araujo:2002:GAR,
author = "Guido Araujo and Guilherme Ottoni and Marcelo Cintra",
title = "Global array reference allocation",
journal = j-TODAES,
volume = "7",
number = "2",
pages = "336--357",
month = apr,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tsao:2002:UDC,
author = "Chung-wen Albert Tsao and Cheng-kok Koh",
title = "{UST\slash DME}: a clock tree router for general skew
constraints",
journal = j-TODAES,
volume = "7",
number = "3",
pages = "359--379",
month = jul,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kountouris:2002:ESC,
author = "Apostolos A. Kountouris and Christophe Wolinski",
title = "Efficient scheduling of conditional behaviors for
high-level synthesis",
journal = j-TODAES,
volume = "7",
number = "3",
pages = "380--412",
month = jul,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vahid:2002:PSP,
author = "Frank Vahid",
title = "Partitioning sequential programs for {CAD} using a
three-step approach",
journal = j-TODAES,
volume = "7",
number = "3",
pages = "413--429",
month = jul,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lapinskii:2002:CAH,
author = "Viktor S. Lapinskii and Margarida F. Jacome and
Gustavo A. De Veciana",
title = "Cluster assignment for high-performance embedded
{VLIW} processors",
journal = j-TODAES,
volume = "7",
number = "3",
pages = "430--454",
month = jul,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Saxena:2002:ESL,
author = "Vikram Saxena and Farid N. Najm and Ibrahim N. Hajj",
title = "Estimation of state line statistics in sequential
circuits",
journal = j-TODAES,
volume = "7",
number = "3",
pages = "455--473",
month = jul,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Glebov:2002:FNA,
author = "A. Glebov and S. Gavrilov and D. Blaauw and V.
Zolotov",
title = "False-noise analysis using logic implications",
journal = j-TODAES,
volume = "7",
number = "3",
pages = "474--498",
month = jul,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:04 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sarrafzadeh:2002:GE,
author = "Majid Sarrafzadeh and Rajeev Jayaraman",
title = "Guest editorial",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "499--500",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vemuri:2002:BBL,
author = "Navin Vemuri and Priyank Kalla and Russell Tessier",
title = "{BDD}-based logic synthesis for {LUT}-based {FPGAs}",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "501--525",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Fan:2002:RDG,
author = "Hongbing Fan and Jiping Liu and Yu-Liang Wu and C. K.
Wong",
title = "Reduction design for generic universal switch blocks",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "526--546",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dandalis:2002:RTP,
author = "Andreas Dandalis and Viktor K. Prasanna",
title = "Run-time performance optimization of an {FPGA}-based
deduction engine for {SAT} solvers",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "547--562",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2002:BSF,
author = "Haibo Wang and Sarma B. K. Vrudhula",
title = "Behavioral synthesis of field programmable analog
array circuits",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "563--604",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kastner:2002:IGH,
author = "R. Kastner and A. Kaplan and S. Ogrenci Memik and E.
Bozorgzadeh",
title = "Instruction generation for hybrid reconfigurable
systems",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "605--627",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2002:PDP,
author = "Guang-Ming Wu and Jai-Ming Lin and Yao-Wen Chang",
title = "Performance-driven placement for dynamically
reconfigurable {FPGAs}",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "628--642",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Singh:2002:ECC,
author = "Amit Singh and Ganapathy Parthasarathy and
Ma{\l}gorzata Marek-Sadowska",
title = "Efficient circuit clustering for area and power
reduction in {FPGAs}",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "643--663",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dutt:2002:SBB,
author = "Shantanu Dutt and Vinay Verma and Hasan Arslan",
title = "A search-based bump-and-refit approach to incremental
routing for {ECO} applications in {FPGAs}",
journal = j-TODAES,
volume = "7",
number = "4",
pages = "664--693",
month = oct,
year = "2002",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tragoudas:2003:PDF,
author = "S. Tragoudas and N. Denny",
title = "Path delay fault testing using test points",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "1--10",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2003:AFF,
author = "Yao-Wen Chang and Kai Zhu and Guang-Ming Wu and D. F.
Wong and C. K. Wong",
title = "Analysis of {FPGA\slash FPIC} switch modules",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "11--37",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jone:2003:DTI,
author = "W.-B. Jone and J. S. Wang and Hsueh-I Lu and I. P. Hsu
and J.-Y. Chen",
title = "Design theory and implementation for low-power
segmented bus systems",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "38--54",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yao:2003:FRC,
author = "Bo Yao and Hongyu Chen and Chung-Kuan Cheng and Ronald
Graham",
title = "Floorplan representations: {Complexity} and
connections",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "55--80",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Riepe:2003:TPN,
author = "Michael A. Riepe and Karem A. Sakallah",
title = "Transistor placement for noncomplementary digital
{VLSI} cell synthesis",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "81--107",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Blanton:2003:PIP,
author = "R. D. (Shawn) Blanton and John P. Hayes",
title = "On the properties of the input pattern fault model",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "108--124",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{VanAchteren:2003:SSD,
author = "Tanja {Van Achteren} and Francky Catthoor and Rudy
Lauwereins and Geert Deconinck",
title = "Search space definition and exploration for nonuniform
data reuse opportunities in data-dominant
applications",
journal = j-TODAES,
volume = "8",
number = "1",
pages = "125--139",
month = jan,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:05 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Edwards:2003:TCC,
author = "Stephen A. Edwards",
title = "Tutorial: {Compiling} concurrent languages for
sequential processors",
journal = j-TODAES,
volume = "8",
number = "2",
pages = "141--187",
month = apr,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2003:RBP,
author = "Guang-Ming Wu and Yun-Chih Chang and Yao-Wen Chang",
title = "Rectilinear block placement using {B*}-trees",
journal = j-TODAES,
volume = "8",
number = "2",
pages = "188--202",
month = apr,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2003:MDO,
author = "Ki-Wook Kim and Seong-Ook Jung and Taewhan Kim and
Sung-Mo Kang",
title = "Minimum delay optimization for domino logic
circuits---a coupling-aware approach",
journal = j-TODAES,
volume = "8",
number = "2",
pages = "203--213",
month = apr,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pinar:2003:CSI,
author = "Ali Pinar and C. L. Liu",
title = "Compacting sequences with invariant transition
frequencies",
journal = j-TODAES,
volume = "8",
number = "2",
pages = "214--221",
month = apr,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Singhal:2003:SOA,
author = "Vigyan Singhal and Carl Pixley and Adnan Aziz and Shaz
Qadeer and Robert Brayton",
title = "Sequential optimization in the absence of global
reset",
journal = j-TODAES,
volume = "8",
number = "2",
pages = "222--251",
month = apr,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2003:COV,
author = "Chingren Lee and Jenq Kuen Lee and Tingting Hwang and
Shi-Chun Tsai",
title = "Compiler optimization on {VLIW} instruction scheduling
for low power",
journal = j-TODAES,
volume = "8",
number = "2",
pages = "252--268",
month = apr,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lopez-Vallejo:2003:HSP,
author = "Marisa L{\'o}pez-Vallejo and Juan Carlos L{\'o}pez",
title = "On the hardware-software partitioning problem:
{System} modeling and partitioning techniques",
journal = j-TODAES,
volume = "8",
number = "3",
pages = "269--297",
month = jul,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Obenaus:2003:GFP,
author = "Stefan Thomas Obenaus and Ted H. Szymanski",
title = "{Gravity}: {Fast} placement for {$3$-D} {VLSI}",
journal = j-TODAES,
volume = "8",
number = "3",
pages = "298--315",
month = jul,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2003:CRD,
author = "X. Yang and M. Wang and R. Kastner and S. Ghiasi and
M. Sarrafzadeh",
title = "Congestion reduction during placement with provably
good approximation bound",
journal = j-TODAES,
volume = "8",
number = "3",
pages = "316--333",
month = jul,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Constantinides:2003:SSA,
author = "G. A. Constantinides and P. Y. K. Cheung and W. Luk",
title = "Synthesis of saturation arithmetic architectures",
journal = j-TODAES,
volume = "8",
number = "3",
pages = "334--354",
month = jul,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kuchcinski:2003:CDS,
author = "Krzysztof Kuchcinski",
title = "Constraints-driven scheduling and resource
assignment",
journal = j-TODAES,
volume = "8",
number = "3",
pages = "355--383",
month = jul,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2003:ACG,
author = "J.-Y. Lee and I.-C. Park",
title = "Address code generation for {DSP} instruction-set
architectures",
journal = j-TODAES,
volume = "8",
number = "3",
pages = "384--395",
month = jul,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 7 11:12:06 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Rawat:2003:I,
author = "Shishpal Rawat and Hans-Joachim Wunderlich",
title = "Introduction",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "397--398",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Goel:2003:STA,
author = "Sandeep Kumar Goel and Erik Jan Marinissen",
title = "{SOC} test architecture design for efficient
utilization of test bandwidth",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "399--429",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{El-Maleh:2003:TVD,
author = "Aiman H. El-Maleh and Yahya E. Osais",
title = "Test vector decomposition-based static compaction
algorithms for combinational circuits",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "430--459",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Reddy:2003:TDV,
author = "Sudhakar M. Reddy and Kohei Miyase and Seiji Kajihara
and Irith Pomeranz",
title = "On test data volume reduction for multiple scan chain
designs",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "460--469",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2003:TDC,
author = "Lei Li and Krishnendu Chakrabarty and Nur A. Touba",
title = "Test data compression using dictionaries with
selective entries and fixed-length indices",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "470--490",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Singh:2003:MST,
author = "Adit D. Singh and Markus Seuring and Michael
G{\"o}ssel and Egor S. Sogomonyan",
title = "Multimode scan: {Test} per clock {BIST} for {IP}
cores",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "491--505",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nummer:2003:THP,
author = "Muhammad Nummer and Manoj Sachdev",
title = "Testing high-performance pipelined circuits with
slow-speed testers",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "506--521",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Parthasarathy:2003:PTA,
author = "Kumar Parthasarathy and Turker Kuyel and Dana Price
and Le Jin and Degang Chen and Randall Geiger",
title = "{BIST} and production testing of {ADCs} using
imprecise stimulus",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "522--545",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2003:CLF,
author = "Zhuo Li and Xiang Lu and Wangqi Qiu and Weiping Shi
and D. M. H. Walker",
title = "A circuit level fault model for resistive bridges",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "546--559",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Niggemeyer:2003:DAM,
author = "Dirk Niggemeyer and Elizabeth M. Rudnick",
title = "A data acquisition methodology for on-chip repair of
embedded memories",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "560--576",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Neuberger:2003:MBU,
author = "Gustavo Neuberger and Fernanda de Lima and Luigi Carro
and Ricardo Reis",
title = "A multiple bit upset tolerant {SRAM} memory",
journal = j-TODAES,
volume = "8",
number = "4",
pages = "577--590",
month = oct,
year = "2003",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Oct 31 06:04:08 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bunker:2004:FHS,
author = "Annette Bunker and Ganesh Gopalakrishnan and Sally A.
Mckee",
title = "Formal hardware specification languages for protocol
compliance verification",
journal = j-TODAES,
volume = "9",
number = "1",
pages = "1--32",
month = jan,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jan 28 17:18:10 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2004:PMA,
author = "Hao Li and Srinivas Katkoori and Wai-Kei Mak",
title = "Power minimization algorithms for {LUT}-based {FPGA}
technology mapping",
journal = j-TODAES,
volume = "9",
number = "1",
pages = "33--51",
month = jan,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jan 28 17:18:10 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cho:2004:FMB,
author = "Jeonghun Cho and Yunheung Paek and David Whalley",
title = "Fast memory bank assignment for fixed-point digital
signal processors",
journal = j-TODAES,
volume = "9",
number = "1",
pages = "52--74",
month = jan,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jan 28 17:18:10 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Das:2004:MDR,
author = "Sandip Das and Susmita Sur-Kolay and Bhargab B.
Bhattacharya",
title = "{Manhattan}-diagonal routing in channels and
switchboxes",
journal = j-TODAES,
volume = "9",
number = "1",
pages = "75--104",
month = jan,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jan 28 17:18:10 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2004:BBA,
author = "Lieh-Ming Wu and Kuochen Wang and Chuang-Yi Chiu",
title = "A {BNF}-based automatic test program generator for
compatible microprocessor verification",
journal = j-TODAES,
volume = "9",
number = "1",
pages = "105--132",
month = jan,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jan 28 17:18:10 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kjeldsberg:2004:SRE,
author = "P. G. Kjeldsberg and F. Catthoor and E. J. Aas",
title = "Storage requirement estimation for optimized design of
data intensive applications",
journal = j-TODAES,
volume = "9",
number = "2",
pages = "133--158",
month = apr,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sabade:2004:BTM,
author = "Sagar S. Sabade and Duncan M. Walker",
title = "{I$_{\mbox {DDX}}$}-based test methods: a survey",
journal = j-TODAES,
volume = "9",
number = "2",
pages = "159--198",
month = apr,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ma:2004:SCU,
author = "Yuchun Ma and Xianlong Hong and Sheqin Dong and Yici
Cai and Chung-Kuan Cheng and Jun Gu",
title = "Stairway compaction using corner block list and its
applications with rectilinear blocks",
journal = j-TODAES,
volume = "9",
number = "2",
pages = "199--211",
month = apr,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Murthy:2004:BMP,
author = "Praveen K. Murthy and Shuvra S. Bhattacharyya",
title = "Buffer merging---a powerful technique for reducing
memory requirements of synchronous dataflow
specifications",
journal = j-TODAES,
volume = "9",
number = "2",
pages = "212--237",
month = apr,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Doboli:2004:TLL,
author = "Alex Doboli and Nagu Dhanwada and Adrian Nunez-Aldana
and Ranga Vemuri",
title = "A two-layer library-based approach to synthesis of
analog systems from {VHDL-AMS} specifications",
journal = j-TODAES,
volume = "9",
number = "2",
pages = "238--271",
month = apr,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sundararajan:2004:NAI,
author = "Vijay Sundararajan and Sachin S. Sapatnekar and Keshab
K. Parhi",
title = "A new approach for integration of min-area retiming
and min-delay padding for simultaneously addressing
short-path and long-path constraints",
journal = j-TODAES,
volume = "9",
number = "3",
pages = "273--289",
month = jul,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lepak:2004:SSI,
author = "Kevin M. Lepak and Min Xu and Jun Chen and Lei He",
title = "Simultaneous shield insertion and net ordering for
capacitive and inductive coupling minimization",
journal = j-TODAES,
volume = "9",
number = "3",
pages = "290--309",
month = jul,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vicente:2004:APT,
author = "Juan D. Vicente and Juan Lanchares and Rom{\'a}n
Hermida",
title = "Annealing placement by thermodynamic combinatorial
optimization",
journal = j-TODAES,
volume = "9",
number = "3",
pages = "310--332",
month = jul,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dandalis:2004:ACE,
author = "Andreas Dandalis and Viktor K. Prasanna",
title = "An adaptive cryptographic engine for {Internet}
protocol security architectures",
journal = j-TODAES,
volume = "9",
number = "3",
pages = "333--353",
month = jul,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2004:FVE,
author = "Jun Yang and Rajiv Gupta and Chuanjun Zhang",
title = "Frequent value encoding for low power data buses",
journal = j-TODAES,
volume = "9",
number = "3",
pages = "354--384",
month = jul,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dasdan:2004:EAF,
author = "Ali Dasdan",
title = "Experimental analysis of the fastest optimum cycle
ratio and mean algorithms",
journal = j-TODAES,
volume = "9",
number = "4",
pages = "385--418",
month = oct,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ghosh:2004:COE,
author = "Arijit Ghosh and Tony Givargis",
title = "Cache optimization for embedded processor cores: an
analytical approach",
journal = j-TODAES,
volume = "9",
number = "4",
pages = "419--440",
month = oct,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gupta:2004:CPC,
author = "Sumit Gupta and Rajesh Kumar Gupta and Nikil D. Dutt
and Alexandru Nicolau",
title = "Coordinated parallelizing compiler optimizations and
high-level synthesis",
journal = j-TODAES,
volume = "9",
number = "4",
pages = "441--470",
month = oct,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cota:2004:RCN,
author = "{\'E}rika Cota and Luigi Carro and Marcelo
Lubaszewski",
title = "Reusing an on-chip network for the test of core-based
systems",
journal = j-TODAES,
volume = "9",
number = "4",
pages = "471--499",
month = oct,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Krishna:2004:AHE,
author = "C. V. Krishna and Abhijit Jas and Nur A. Touba",
title = "Achieving high encoding efficiency with partial
dynamic {LFSR} reseeding",
journal = j-TODAES,
volume = "9",
number = "4",
pages = "500--516",
month = oct,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hung:2004:SCR,
author = "William N. N. Hung and Xiaoyu Song and El Mostapha
Aboulhamid and Andrew Kennings and Alan Coppola",
title = "Segmented channel routability via satisfiability",
journal = j-TODAES,
volume = "9",
number = "4",
pages = "517--528",
month = oct,
year = "2004",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Nov 4 08:12:30 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dutt:2005:E,
author = "Nikil Dutt",
title = "Editorial",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "1--2",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cong:2005:TMA,
author = "Jason Cong and Hui Huang and Xin Yuan",
title = "Technology mapping and architecture evaluation for $ k
/ m$-macrocell-based {FPGAs}",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "3--23",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ruan:2005:BEL,
author = "Shanq-Jang Ruan and Kun-Lin Tsai and Edwin Naroska and
Feipei Lai",
title = "Bipartitioning and encoding in low-power pipelined
circuits",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "24--32",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Memik:2005:SAO,
author = "Seda Ogrenci Memik and Ryan Kastner and Elaheh
Bozorgzadeh and Majid Sarrafzadeh",
title = "A scheduling algorithm for optimization and early
planning in high-level synthesis",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "33--57",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Adya:2005:CTM,
author = "S. N. Adya and I. L. Markov",
title = "Combinatorial techniques for mixed-size placement",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "58--90",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nourani:2005:RHE,
author = "Mehrdad Nourani and Mohammad H. Tehranipour",
title = "{RL-Huffman} encoding for test compression and power
reduction in scan applications",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "91--115",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jan:2005:GMR,
author = "Gene Eu Jan and Ki-Yin Chang and Su Gao and Ian
Parberry",
title = "A $4$-geometry maze router and its application on
multiterminal nets",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "116--135",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Arato:2005:AAH,
author = "P{\'e}ter Arat{\'o} and Zolt{\'a}n {\'A}d{\'a}m Mann
and Andr{\'a}s Orb{\'a}n",
title = "Algorithmic aspects of hardware\slash software
partitioning",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "136--156",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kagaris:2005:UMP,
author = "Dimitri Kagaris",
title = "A unified method for phase shifter computation",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "157--167",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kao:2005:EAF,
author = "Chi-Chou Kao and Yen-Tai Lai",
title = "An efficient algorithm for finding the minimal-area
{FPGA} technology mapping",
journal = j-TODAES,
volume = "10",
number = "1",
pages = "168--186",
month = jan,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 14 10:34:36 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chabini:2005:SOR,
author = "Noureddine Chabini and El Mostapha Aboulhamid and
Isma{\"\i}l Chabini and Yvon Savaria",
title = "Scheduling and optimal register placement for
synchronous circuits derived using software pipelining
techniques",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "187--204",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cao:2005:SSL,
author = "Aiqun Cao and Naran Sirisantana and Cheng-Kok Koh and
Kaushik Roy",
title = "Synthesis of skewed logic circuits",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "205--228",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kadayif:2005:OIT,
author = "I. Kadayif and A. Sivasubramaniam and M. Kandemir and
G. Kandiraju and G. Chen",
title = "Optimizing instruction {TLB} energy using software and
hardware techniques",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "229--257",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2005:ETT,
author = "Xiao Liu and Michael S. Hsiao and Sreejit Chakravarty
and Paul J. Thadikaran",
title = "Efficient techniques for transition testing",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "258--278",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Poon:2005:DPM,
author = "Kara K. W. Poon and Steven J. E. Wilton and Andy Yan",
title = "A detailed power model for field-programmable gate
arrays",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "279--302",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bhattacharya:2005:OWP,
author = "Soumendu Bhattacharya and Abhijit Chatterjee",
title = "Optimized wafer-probe and assembled package test
design for analog circuits",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "303--329",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mohanty:2005:EED,
author = "Saraju P. Mohanty and N. Ranganathan",
title = "Energy-efficient datapath scheduling using multiple
voltages and dynamic clocking",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "330--353",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Davoodi:2005:VSU,
author = "Azadeh Davoodi and Ankur Srivastava",
title = "Voltage scheduling under unpredictabilities: a risk
management paradigm",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "354--368",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2005:EAV,
author = "Zhong Wang and Xiaobo Sharon Hu",
title = "Energy-aware variable partitioning and instruction
scheduling for multibank memory architectures",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "369--388",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cong:2005:LSC,
author = "Jason Cong and Joseph R. Shinnerl and Min Xie and Tim
Kong and Xin Yuan",
title = "Large-scale circuit placement",
journal = j-TODAES,
volume = "10",
number = "2",
pages = "389--430",
month = apr,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 26 10:39:39 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Paul:2005:HLM,
author = "Joann M. Paul and Donald E. Thomas and Andrew S.
Cassidy",
title = "High-level modeling and simulation of single-chip
programmable heterogeneous multiprocessors",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "431--461",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Roy:2005:FSV,
author = "Arnab Roy and S. K. Panda and Rajeev Kumar and P. P.
Chakrabarti",
title = "A framework for systematic validation and debugging of
pipeline simulators",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "462--491",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Banerjee:2005:OFT,
author = "Ansuman Banerjee and Pallab Dasgupta",
title = "The open family of temporal logics: {Annotating}
temporal operators with input constraints",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "492--522",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Koushanfar:2005:BST,
author = "Farinaz Koushanfar and Inki Hong and Miodrag
Potkonjak",
title = "Behavioral synthesis techniques for intellectual
property protection",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "523--545",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gupta:2005:RAS,
author = "Puneet Gupta and Andrew B. Kahng and Stefanus Mantik",
title = "Routing-aware scan chain ordering",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "546--560",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xiang:2005:AIP,
author = "Hua Xiang and Xiaoping Tang and Martin D. F. Wong",
title = "An algorithm for integrated pin assignment and buffer
planning",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "561--572",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2005:PDD,
author = "Jaehwan John Lee and Vincent John {Mooney III}",
title = "An $ o(\mbox {min}(m, n)) $ parallel deadlock
detection algorithm",
journal = j-TODAES,
volume = "10",
number = "3",
pages = "573--586",
month = jul,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 22 11:16:52 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Harris:2005:I,
author = "Ian G. Harris",
title = "Introduction",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "587--588",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Suhaib:2005:XIM,
author = "Syed M. Suhaib and Deepak A. Mathaikutty and Sandeep
K. Shukla and David Berner",
title = "{XFM}: an incremental methodology for developing
formal models",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "589--609",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Fujita:2005:ECB,
author = "Masahiro Fujita",
title = "Equivalence checking between behavioral and {RTL}
descriptions with virtual controllers and datapaths",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "610--626",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Feng:2005:UDP,
author = "Tao Feng and Li-C Wang and Kwang-Ting (Tim) Cheng and
Chih-Chang (Andy) Lin",
title = "Using $2$-domain partitioned {OBDD} data structure in
an enhanced symbolic simulator",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "627--650",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Higgins:2005:SDA,
author = "Jason T. Higgins and Mark D. Aagaard",
title = "Simplifying the design and automating the verification
of pipelines with structural hazards",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "651--672",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shamshiri:2005:ILT,
author = "Saeed Shamshiri and Hadi Esmaeilzadeh and Zainalabdein
Navabi",
title = "Instruction-level test methodology for {CPU} core
self-testing",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "673--689",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Al-Yamani:2005:TCE,
author = "Ahmad A. Al-Yamani and Edward J. McCluskey",
title = "Test chip experimental results on high-level
structural test",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "690--701",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ciordas:2005:EBM,
author = "Calin Ciordas and Twan Basten and Andrei
R{\u{a}}dulescu and Kees Goossens and Jef {Van
Meerbergen}",
title = "An event-based monitoring service for networks on
chip",
journal = j-TODAES,
volume = "10",
number = "4",
pages = "702--723",
month = oct,
year = "2005",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 13 07:41:02 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dutt:2006:E,
author = "Nikil Dutt",
title = "Editorial",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "1--2",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Givargis:2006:ZCI,
author = "Tony Givargis",
title = "Zero cost indexing for improved processor cache
performance",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "3--25",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Constantinides:2006:WLO,
author = "George A. Constantinides",
title = "Word-length optimization for differentiable nonlinear
systems",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "26--43",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Su:2006:AMS,
author = "Qing Su and Jamil Kawa and Charles Chiang and Yehia
Massoud",
title = "Accurate modeling of substrate resistive coupling for
floating substrates",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "44--51",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Davoodi:2006:ETG,
author = "Azadeh Davoodi and Ankur Srivastava",
title = "Effective techniques for the generalized low-power
binding problem",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "52--69",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Schaumont:2006:ICE,
author = "Patrick Schaumont and Doris Ching and Ingrid
Verbauwhede",
title = "An interactive codesign environment for
domain-specific coprocessors",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "70--87",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jiang:2006:RCD,
author = "Iris Hui-Ru Jiang and Song-Ra Pan and Yao-Wen Chang
and Jing-Yang Jou",
title = "Reliable crosstalk-driven interconnect optimization",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "88--103",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kulkarni:2006:CTA,
author = "Dhananjay Kulkarni and Walid A. Najjar and Robert
Rinker and Fadi J. Kurdahi",
title = "Compile-time area estimation for {LUT}-based {FPGAs}",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "104--122",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shrivastava:2006:CFC,
author = "Aviral Shrivastava and Partha Biswas and Ashok Halambi
and Nikil Dutt and Alex Nicolau",
title = "Compilation framework for code size reduction using
reduced bit-width {ISAs (rISAs)}",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "123--146",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{You:2006:CLP,
author = "Yi-Ping You and Chingren Lee and Jenq Kuen Lee",
title = "Compilers for leakage power reduction",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "147--164",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shao:2006:LST,
author = "Zili Shao and Bin Xiao and Chun Xue and Qingfeng Zhuge
and Edwin H.-M. Sha",
title = "Loop scheduling with timing and switching-activity
minimization for {VLIW DSP}",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "165--185",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mohanty:2006:IMS,
author = "Saraju P. Mohanty and N. Ranganathan and Sunil K.
Chappidi",
title = "{ILP} models for simultaneous energy and transient
power minimization during behavioral synthesis",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "186--212",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ozdal:2006:TLB,
author = "Muhammet Mustafa Ozdal and Martin D. F. Wong",
title = "Two-layer bus routing for high-speed printed circuit
boards",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "213--227",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kandemir:2006:IEB,
author = "M. Kandemir and J. Ramanujam and U. Sezer",
title = "Improving the energy behavior of block buffering using
compiler optimizations",
journal = j-TODAES,
volume = "11",
number = "1",
pages = "228--250",
month = jan,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 12 07:15:39 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ayala-Rincon:2006:PTS,
author = "M. Ayala-Rinc{\'o}n and C. H. Llanos and R. P. Jacobi
and R. W. Hartenstein",
title = "Prototyping time- and space-efficient computations of
algebraic operations over dynamically reconfigurable
systems modeled by rewriting-logic",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "251--281",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Absar:2006:RAI,
author = "Javed Absar and Francky Catthoor",
title = "Reuse analysis of indirectly indexed arrays",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "282--305",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dasdan:2006:HIT,
author = "Ali Dasdan and Ivan Hom",
title = "Handling inverted temperature dependence in static
timing analysis",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "306--324",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2006:ETO,
author = "Zuoyuan Li and Xianlong Hong and Qiang Zhou and Jinian
Bian and Hannah H. Yang and Vijay Pitchumani",
title = "Efficient thermal-oriented {$3$D} floorplanning and
thermal via planning for two-stacked-die integration",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "325--345",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Padmanaban:2006:IGM,
author = "Saravanan Padmanaban and Spyros Tragoudas",
title = "Implicit grading of multiple path delay faults",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "346--361",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2006:OSM,
author = "Deming Chen and Jason Cong and Junjuan Xu",
title = "Optimal simultaneous module and multivoltage
assignment for low power",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "362--386",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhu:2006:CZD,
author = "Haikun Zhu and Chung-Kuan Cheng and Ronald Graham",
title = "On the construction of zero-deficiency parallel prefix
circuits with minimum depth",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "387--409",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kandemir:2006:REC,
author = "Mahmut Taylan Kandemir",
title = "Reducing energy consumption of multiprocessor {SoC}
architectures by exploiting memory bank locality",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "410--441",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Su:2006:CTD,
author = "Fei Su and Sule Ozev and Krishnendu Chakrabarty",
title = "Concurrent testing of digital microfluidics-based
biochips",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "442--464",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Atienza:2006:SDM,
author = "David Atienza and Jose M. Mendias and Stylianos
Mamagkakis and Dimitrios Soudris and Francky Catthoor",
title = "Systematic dynamic memory management design
methodology for reduced memory footprint",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "465--489",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2006:LVA,
author = "Wei Li and Daniel Blakely and Scott {Van Sooy} and
Keven Dunn and David Kidd and Robert Rogenmoser and
Dian Zhou",
title = "{LVS} verification across multiple power domains for a
quad-core microprocessor",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "490--500",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cheatham:2006:SFT,
author = "Jason A. Cheatham and John M. Emmert and Stan
Baumgart",
title = "A survey of fault tolerant methodologies for {FPGAs}",
journal = j-TODAES,
volume = "11",
number = "2",
pages = "501--533",
month = apr,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:18 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pedram:2006:ISI,
author = "Massoud Pedram",
title = "Introduction to special issue: {Novel} paradigms in
system-level design",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "535--536",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pinto:2006:SLD,
author = "Alessandro Pinto and Alvise Bonivento and Allberto L.
Sangiovanni-Vincentelli and Roberto Passerone and Marco
Sgroi",
title = "System level design paradigms: {Platform-based} design
and communication synthesis",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "537--563",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Marculescu:2006:CCR,
author = "Radu Marculescu and Umit Y. Ogras and Nicholas H.
Zamora",
title = "Computation and communication refinement for
multiprocessor {SoC} design: a system-level
perspective",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "564--592",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pop:2006:AOD,
author = "Paul Pop and Petru Eles and Zebo Peng and Traian Pop",
title = "Analysis and optimization of distributed real-time
embedded systems",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "593--625",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mishra:2006:ADL,
author = "Prabhat Mishra and Aviral Shrivastava and Nikil Dutt",
title = "Architecture description language {(ADL)-driven}
software toolkit generation for architectural
exploration of programmable {SOCs}",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "626--658",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lysecky:2006:WP,
author = "Roman Lysecky and Greg Stitt and Frank Vahid",
title = "{Warp Processors}",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "659--681",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Su:2006:MPF,
author = "Fei Su and Krishnendu Chakrabarty",
title = "Module placement for fault-tolerant
microfluidics-based biochips",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "682--710",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hanchate:2006:GTF,
author = "Narender Hanchate and Nagarajan Ranganathan",
title = "A game-theoretic framework for multimetric
optimization of interconnect delay, power, and
crosstalk noise during wire sizing",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "711--739",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2006:SPC,
author = "Gang Chen and Jason Cong",
title = "Simultaneous placement with clustering and
duplication",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "740--772",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bhanja:2006:SFG,
author = "Sanjukta Bhanja and Karthikeyan Lingasubramanian and
N. Ranganathan",
title = "A stimulus-free graphical probabilistic switching
model for sequential circuits using dynamic {Bayesian}
networks",
journal = j-TODAES,
volume = "11",
number = "3",
pages = "773--796",
month = jul,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Aug 23 10:13:19 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cao:2006:POS,
author = "Aiqun Cao and Ruibing Lu and Chen Li and Cheng-Kok
Koh",
title = "Postlayout optimization for synthesis of {Domino}
circuits",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "797--821",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nacul:2006:STC,
author = "Andr{\'e} C. N{\'a}cul and Tony Givargis",
title = "Synthesis of time-constrained multitasking embedded
software",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "822--847",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kang:2006:STA,
author = "Kunhyuk Kang and Bipul C. Paul and Kaushik Roy",
title = "Statistical timing analysis using levelized covariance
propagation considering systematic and random
variations of process parameters",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "848--879",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kuo:2006:DID,
author = "Wu-An Kuo and Tingting Hwang and Allen C.-H. Wu",
title = "Decomposition of instruction decoders for low-power
designs",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "880--889",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2006:CML,
author = "Yi-Yu Liu and Kuo-Hua Wang and Tingting Hwang",
title = "Crosstalk minimization in logic synthesis for {PLAs}",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "890--915",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Goren:2006:TSG,
author = "Sezer G{\"o}ren and F. Joel Ferguson",
title = "Test sequence generation for controller verification
and test with high coverage",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "916--938",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2006:MWR,
author = "Zhong-Zhen Wu and Shih-Chieh Chang",
title = "Multiple wire reconnections based on implication flow
graph",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "939--952",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2006:PDT,
author = "Chi-Shong Wang and Chingwei Yeh",
title = "Performance-driven technology mapping with {MSG}
partition and selective gate duplication",
journal = j-TODAES,
volume = "11",
number = "4",
pages = "953--973",
month = oct,
year = "2006",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 15 06:47:05 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gangwar:2007:IIC,
author = "Anup Gangwar and M. Balakrishnan and Anshul Kumar",
title = "Impact of intercluster communication mechanisms on
{ILP} in clustered {VLIW} architectures",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "VLIW processors have started gaining acceptance in the
embedded systems domain. However, monolithic register
file VLIW processors with a large number of functional
units are not viable. This is because of the need for a
large number of ports to support FU requirements, which
makes them expensive and extremely slow. A simple
solution is to break the register file into a number of
smaller register files with a subset of FUs connected
to it. These architectures are termed clustered VLIW
processors. In this article, we first build a case for
clustered VLIW processors with four or more clusters by
showing that the achievable ILP in most of the media
applications for a 16 ALU and 8 LD/ST VLIW processor is
around 20. We then provide a classification of the
intercluster interconnection design space, and show
that a large part of this design space is currently
unexplored. Next, using our performance evaluation
methodology, we evaluate a subset of this design space
and show that the most commonly used type of
interconnection, RF-to-RF, fails to meet achievable
performance by a large factor, while certain other
types of interconnections can lower this gap
considerably. We also establish that this behavior is
heavily application dependent, emphasizing the
importance of application-specific architecture
exploration. We also present results about the
statistical behavior of these different architectures
by varying the number of clusters in our framework from
4 to 16. These results clearly show the advantages of
one specific architecture over others. Finally, based
on our results, we propose a new interconnection
network, which should lower this performance gap.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "ASIP; clustered VLIW processors; performance
evaluation; VLIW",
}
@Article{Zamora:2007:SLP,
author = "Nicholas H. Zamora and Xiaoping Hu and Radu
Marculescu",
title = "System-level performance\slash power analysis for
platform-based design of multimedia applications",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The objective of this article is to introduce the use
of Stochastic Automata Networks (SANs) as an effective
formalism for application-architecture modeling in
system-level average-case analysis for platform-based
design. By platform, we mean a family of heterogeneous
architectures that satisfy a set of architectural
constraints imposed to allow re-use of hardware and
software components. More precisely, we show how SANs
can be used early in the design cycle to identify the
best performance/power trade-offs among several
application-architecture combinations. Having this
information available not only helps avoid lengthy
simulations for predicting power and performance
figures, but also enables efficient mapping of
different applications onto a chosen platform. We
illustrate the benefits of our methodology by using the
``Picture-in-Picture'' video decoder as a driver
application.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "average-case analysis; design space exploration;
hardware/software codesign; Markov chains; performance
models; platform-based design; stochastic automata
networks (SANs)",
}
@Article{Sham:2007:ARD,
author = "Chiu-Wing Sham and Evangeline F. Y. Young",
title = "Area reduction by deadspace utilization on
interconnect optimized floorplan",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Interconnect optimization has become the major concern
in floorplanning. Many approaches would use simulated
annealing (SA) with a cost function composed of a
weighted sum of area, wirelength, and interconnect
cost. These approaches can reduce the interconnect cost
efficiently but the area penalty of the interconnect
optimized floorplan is usually quite large. In this
article, we propose an approach called deadspace
utilization (DSU) to reclaim the unused area of an
interconnect optimized floorplan by linear programming.
Since modules are not necessarily rectangular in shape
in floorplanning, some deadspace can be redistributed
to the modules to increase the area occupied by each
module. If the area of each module can be expanded by
the same ratio, the whole floorplan can be compacted by
that ratio to give a smaller floorplan. However, we
will limit the compaction ratio to prevent
overcongestion. Experiments show that we can apply this
deadspace utilization technique to reduce the area and
total wirelength of an interconnect optimized floorplan
further while the routability can be maintained at the
same time.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "area reduction; Floorplanning",
}
@Article{Li:2007:SBC,
author = "Lei Li and Zhanglei Wang and Krishnendu Chakrabarty",
title = "Scan-{BIST} based on cluster analysis and the encoding
of repeating sequences",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a built-in self-test (BIST) approach for
full-scan designs that extracts the most frequently
occurring sequences from deterministic test patterns.
The extracted sequences are stored on-chip, and are
used during test application. Three sets of test
patterns are applied to the circuit under test during a
BIST test session; these include pseudorandom patterns,
semirandom patterns, and deterministic patterns. The
semirandom patterns are generated based on the stored
sequences and they are more likely to detect
hard-to-detect faults than pseudorandom patterns. The
deterministic patterns are encoded using either the
stored sequences or the LFSR reseeding technique to
reduce test data volume. We use the cluster analysis
technique for sequence extraction to reduce the amount
of data to be stored. Experimental results for the
ISCAS-89 benchmark circuits show that the proposed
approach often requires less on-chip storage and test
data volume than other recent BIST methods.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Built-in self-test (BIST); clustering test data
volume; test compression",
}
@Article{Cai:2007:WAD,
author = "Yuan Cai and Marcus T. Schmitz and Bashir M.
Al-Hashimi and Sudhakar M. Reddy",
title = "Workload-ahead-driven online energy minimization
techniques for battery-powered embedded systems with
time-constraints",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes a new online voltage scaling
(VS) technique for battery-powered embedded systems
with real-time constraints. The VS technique takes into
account the execution times and discharge currents of
tasks to further reduce the battery charge consumption
when compared to the recently reported slack forwarding
technique [Ahmed and Chakrabarti 2004], while
maintaining low online complexity of O (1).
Furthermore, we investigate the impact of online
rescheduling and remapping on the battery charge
consumption for tasks with data dependency which has
not been explicitly addressed in the literature and
propose a novel rescheduling/remapping technique.
Finally, we take leakage power into consideration and
extend the proposed online techniques to include
adaptive body biasing (ABB) which is used to reduce the
leakage power. We demonstrate and compare the
efficiency of the presented techniques using seven
real-life benchmarks and numerous automatically
generated examples.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "adaptive body biasing; battery; Dynamic voltage
scaling; embedded systems",
}
@Article{Zhu:2007:HMF,
author = "Xinping Zhu and Sharad Malik",
title = "A hierarchical modeling framework for on-chip
communication architectures of multiprocessing {SoCs}",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In multiprocessor-based SoCs, optimizing the
communication architecture is often as important, if
not more important, than optimizing the computation
architecture. While there are mature platforms and
techniques for the modeling and evaluation of
architectures of processing elements, the same is not
true for the communication architectures. This article
presents an application-driven retargetable prototyping
platform that fills this gap. This environment aims to
facilitate the design exploration of the communication
subsystem through application-level execution-driven
simulations and quantitative analysis. Based on an
analysis of a wide range of on-chip communication
architectures, we describe how a specific hierarchical
class library can be used to develop new on-chip
communication architectures, or variants of existing
ones with relatively little incremental effort. We
demonstrate this through three case studies including
two commercial on-chip bus systems and an on-chip
packet switching network. Here we show that, through
careful analysis and construction, it is possible for
the modeling environment to support the common features
of these architectures as part of the library and
permit instantiation of the individual architectures as
variants of the library design. Consequently,
system-level design choices regarding the communication
architecture can be made with high confidence in the
early stages of design. In addition to improving design
quality, this methodology also results in significantly
shortening design-time.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bus; design exploration; multiprocessor system;
network-on-chip; object-oriented modeling; on-chip
communication architecture; packet-switching network;
Retargetable simulation",
}
@Article{Majumder:2007:HPV,
author = "Subhashis Majumder and Susmita Sur-Kolay and Bhargab
B. Bhattacharya and Swarup Kumar Das",
title = "Hierarchical partitioning of {VLSI} floorplans by
staircases",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article addresses the problem of recursively
bipartitioning a given floorplan F using monotone
staircases. At each level of the hierarchy, a monotone
staircase from one corner of F to its opposite corner
is identified, such that (i) the two parts of the
bipartition are nearly equal in area (or in the number
of blocks), and (ii) the number of nets crossing the
staircase is minimal. The problem of area-balanced
bipartitioning is shown to be NP-hard, and a
maxflow-based heuristic is proposed. Such a hierarchy
may be useful to repeater placement in deep-submicron
physical design, and also to global routing.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "balanced bipartitioning; Floorplanning; global
routing; network flow; NP-completeness",
}
@Article{Lee:2007:ISS,
author = "Jong-Eun Lee and Kiyoung Choi and Nikil D. Dutt",
title = "Instruction set synthesis with efficient instruction
encoding for configurable processors",
journal = j-TODAES,
volume = "12",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2007",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:29 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Application-specific instructions can significantly
improve the performance, energy-efficiency, and code
size of configurable processors. While generating new
instructions from application-specific operation
patterns has been a common way to improve the
instruction set (IS) of a configurable processor,
automating the design of ISs for given applications
poses new challenges---how to create as well as utilize
new instructions in a systematic manner, and how to
choose the best set of application-specific
instructions considering the various effects the new
instructions may have on the data path and the
compilation? To address these problems, we present a
novel IS synthesis framework that optimizes the IS
through an efficient instruction encoding for the given
application as well as for the given data path
architecture. We first build a library of new
instructions created with various encoding alternatives
taking into account the data path architecture
constraints, and then select the best set of
instructions while satisfying the instruction bitwidth
constraint. We formulate the problem using integer
linear programming and also present an effective
heuristic algorithm. Experimental results using our
technique generate ISs that show improvements of up to
about 40\% over the native IS for several application
benchmarks running on typical embedded RISC
processors.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Application-specific instruction set processor (ASIP);
bitwidth-economical; configurable processor;
instruction encoding; ISA customization and
specialization",
}
@Article{Dutt:2007:E,
author = "Nikil Dutt",
title = "Editorial",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "9:1--9:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230801",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2007:DIC,
author = "Chao Wang and Zijiang Yang and Franjo
Ivan{\v{c}}i{\'c} and Aarti Gupta",
title = "Disjunctive image computation for software
verification",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "10:1--10:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230802",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Existing BDD-based symbolic algorithms designed for
hardware designs do not perform well on software
programs. We propose novel techniques based on unique
characteristics of software programs. Our algorithm
divides an image computation step into a disjunctive
set of easier ones that can be performed in isolation.
We use hypergraph partitioning to minimize the number
of live variables in each disjunctive component, and
variable scopes to simplify transition relations and
reachable state subsets. Our experiments on nontrivial
C programs show that BDD-based symbolic algorithms can
directly handle software models with a much larger
number of state variables than for hardware designs.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "binary decision diagram; formal verification; image
computation; Model checking; reachability analysis",
}
@Article{Mochocki:2007:TOA,
author = "Bren Mochocki and Xiaobo Sharon Hu and Gang Quan",
title = "Transition-overhead-aware voltage scheduling for
fixed-priority real-time systems",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "11:1--11:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230803",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Time transition overhead is a critical problem for
hard real-time systems that employ dynamic voltage
scaling (DVS) for power and energy management. While it
is a common practice of much previous work to ignore
transition overhead, these algorithms cannot guarantee
deadlines and/or are less effective in saving energy
when transition overhead is significant and not
appropriately dealt with. In this article we introduce
two techniques, one offline and one online, to
correctly account for transition overhead in preemptive
fixed-priority real-time systems. We present several
DVS scheduling algorithms that implement these methods
that can guarantee task deadlines under arbitrarily
large transition time overheads and reduce energy
consumption by as much as 40\% when compared to
previous methods.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Dynamic voltage scaling; fixed priority; low power;
scheduling; transition overhead",
}
@Article{Chang:2007:PLP,
author = "Hongliang Chang and Sachin S. Sapatnekar",
title = "Prediction of leakage power under process
uncertainties",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "12:1--12:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230804",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present a method to analyze the
total leakage current of a circuit under process
variations, considering interdie and intradie
variations as well as the effect of the spatial
correlations of intradie variations. The approach
considers both the subthreshold and gate tunneling
leakage power, as well as their interactions. With
process variations, each leakage component is
approximated by a lognormal distribution, and the total
chip leakage is computed as a sum of the correlated
lognormals. Since the lognormals to be summed are large
in number and have complicated correlation structures
due to both spatial correlations and the correlation
among different leakage mechanisms, we propose an
efficient method to reduce the number of correlated
lognormals for summation to a manageable quantity. We
do so by identifying dominant states of leakage
currents and taking advantage of the spatial
correlation model and input states at the gates. An
improved approach utilizing the principal components
computed from spatially correlated process parameters
is also proposed to further improve runtime efficiency.
We show that the proposed methods are effective in
predicting the probability distribution of total chip
leakage, and that ignoring spatial correlations can
underestimate the standard deviation of full-chip
leakage power.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Circuit; leakage; process variation; yield",
}
@Article{Mohanty:2007:MBE,
author = "Sumit Mohanty and Viktor K. Prasanna",
title = "A model-based extensible framework for efficient
application design using {FPGA}",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "13:1--13:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230805",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "For an FPGA designer, several choices are available in
terms of target FPGA devices, IP-cores, algorithms,
synthesis options, runtime reconfiguration, degrees of
parallelism, among others, while implementing a design.
Evaluation of design alternatives in the early stages
of the design cycle is important because the choices
made can have a critical impact on the performance of
the final design. However, a large number of
alternatives not only results in a large number of
designs, but also makes it a hard problem to
efficiently manage, simulate, and evaluate them. In
this article, we present a framework for FPGA-based
application design that addresses the aforementioned
issues. This framework supports a hierarchical modeling
approach that integrates application and device
modeling techniques and allows development of a library
of models for design reuse. The framework integrates a
high-level performance estimator for rapid estimation
of the latency, area, and energy of the designs. In
addition, a design space exploration tool allows
efficient evaluation of candidate designs against the
given performance requirements. The framework also
supports extension through integration of widely used
tools for FPGA-based design while presenting a unified
environment for different target FPGAs. We demonstrate
our framework through the modeling and performance
estimation of a signal processing kernel and the design
of end-to-end applications.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design tool; extensible; Modeling; reuse",
}
@Article{Tang:2007:PDF,
author = "Weiyu Tang and Arun Kejariwal and Alexander V.
Veidenbaum and Alexandru Nicolau",
title = "A predictive decode filter cache for reducing power
consumption in embedded processors",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "14:1--14:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230806",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With advances in semiconductor technology, power
management has increasingly become a very important
design constraint in processor design. In embedded
processors, instruction fetch and decode consume more
than 40\% of processor power. This calls for
development of power minimization techniques for the
fetch and decode stages of the processor pipeline. For
this, filter cache has been proposed as an
architectural extension for reducing the power
consumption. A filter cache is placed between the CPU
and the instruction cache (I-cache) to provide the
instruction stream. A filter cache has the advantages
of shorter access time and lower power consumption.
However, the downside of a filter cache is a possible
performance loss in case of cache misses. \par
In this article, we present a novel technique---decode
filter cache (DFC)---for minimizing power consumption
with minimal performance impact. The DFC stores decoded
instructions. Thus, a hit in the DFC eliminates
instruction fetch and its subsequent decoding. The
bypassing of both instruction fetch and decode reduces
processor power. We present a runtime approach for
predicting whether the next fetch source is present in
the DFC. In case a miss is predicted, we reduce the
miss penalty by accessing the I-cache directly. We
propose to classify instructions as cacheable or
noncacheable, depending on the decode width. For
efficient use of the cache space, a sectored cache
design is used for the DFC so that both cacheable and
noncacheable instructions can coexist in the DFC
sector. Experimental results show that the DFC reduces
processor power by 34\% on an average and our next
fetch prediction mechanism reduces miss penalty by more
than 91\%.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Cache; embedded processors; power optimization",
}
@Article{Issenin:2007:DDR,
author = "Ilya Issenin and Erik Brockmeyer and Miguel Miranda
and Nikil Dutt",
title = "{DRDU}: a data reuse analysis technique for efficient
scratch-pad memory management",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "15:1--15:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230807",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In multimedia and other streaming applications, a
significant portion of energy is spent on data
transfers. Exploiting data reuse opportunities in the
application, we can reduce this energy by making copies
of frequently used data in a small local memory and
replacing speed- and power-inefficient transfers from
main off-chip memory by more efficient local data
transfers. In this article we present an automated
approach for analyzing these opportunities in a program
that allows modification of the program to use custom
scratch-pad memory configurations comprising a
hierarchical set of buffers for local storage of
frequently reused data. Using our approach we are able
to both reduce energy consumption of the memory
subsystem when using a scratch-pad memory by about a
factor of two, on average, and improve memory system
performance compared to a cache of the same size.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compiler analysis; data reuse analysis; memory
hierarchy; Scratch-pad memory management",
}
@Article{Hosseinabady:2007:LTA,
author = "Mohammad Hosseinabady and Pejman Lotfi-Kamran and
Zainalabedin Navabi",
title = "Low test application time resource binding for
behavioral synthesis",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "16:1--16:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230808",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recent advances in process technology have led to a
rapid increase in the density of integrated circuits
(ICs). Increased density and the need to test for new
types of defects in nanometer technologies have
resulted in a tremendous increase in test application
time (TAT). This article presents a test synthesis
method to reduce test application time for testing the
datapath of a design. The test application time is
reduced by applying a test-time-aware resource sharing
algorithm on a scheduled control data flow graph (CDFG)
of a design.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "CDFG; high-level synthesis; test synthesis;
Testability",
}
@Article{Elshoukry:2007:CPA,
author = "Mohammed Elshoukry and Mohammad Tehranipoor and C. P.
Ravikumar",
title = "A critical-path-aware partial gating approach for test
power reduction",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "17:1--17:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230809",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power reduction during test application is important
from the viewpoint of chip reliability and for
obtaining correct test results. One of the ways to
reduce scan test power is to block transitions
propagating from the outputs of scan cells through
combinational logic. In order to accomplish this, some
researchers have proposed setting primary inputs to
appropriate values or adding extra gates at the outputs
of scan cells. In this article, we point out the
limitations of such full gating techniques in terms of
area overhead and performance degradation. We propose
an alternate solution where a partial set of scan cells
is gated. A subset of scan cells is selected to give
maximum reduction in test power within a given area
constraint. An alternate formulation of the problem is
to treat maximum permitted test power as a constraint
and achieve a test power that is within this limit
using the fewest number of gated scan cells, thereby
leading to the least impact in area overhead. Our
problem formulation also comprehends performance
constraints and prevents the inclusion of gating points
on critical paths. The area overhead is predictable and
closely corresponds to the average power reduction.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Low-power testing; partial gating; scan cell gating;
scan testing",
}
@Article{Pomeranz:2007:FDT,
author = "Irith Pomeranz and Sudhakar M. Reddy",
title = "Forming {N}-detection test sets without test
generation",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "18:1--18:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230810",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We describe a procedure for forming $n$-detection test
sets for $ n > 1$ without applying a test generation
procedure to target faults. The proposed procedure
accepts a one-detection test set. It extracts test
cubes for target faults from the one-detection test
set, and merges the test cubes to obtain new test
vectors. By extracting and merging different test cubes
in different iterations of this process, an
$n$-detection test set is obtained. Merging of test
cubes does not require test generation or fault
simulation. Fault simulation is required for extracting
test cubes for target faults. We demonstrate that the
resulting test set is as effective in detecting
untargeted faults as an $n$-detection test set
generated by a deterministic test generation procedure.
We also discuss the application of the proposed
procedure starting from a random test set (instead of a
one-detection test set).",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "$n$-detection test sets; Bridging faults; stuck-at
faults; test generation",
}
@Article{Fan:2007:ECD,
author = "Hongbing Fan and Jiping Liu and Yu-Liang Wu and
Chak-Chung Cheung",
title = "The exact channel density and compound design for
generic universal switch blocks",
journal = j-TODAES,
volume = "12",
number = "2",
pages = "19:1--19:??",
month = apr,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1230800.1230811",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:08:48 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A switch block of $k$ sides $W$ terminals on each side
is said to be universal (a $ (k, W)$-USB) if it is
routable for every set of 2-pin nets of channel density
at most $W$. The generic optimum universal switch block
design problem is to design a $ (k, W)$-USB with the
minimum number of switches for every pair of $ (k, W)$.
This problem was first proposed and solved for $ k = 4$
in Chang et al. [1996], and then solved for even $W$ or
for $ k \leq 6$ in Shuy et al. [2000] and Fan et al.
[2002b]. No optimum $ (k, W)$-USB is known for $ k \geq
7$ and odd $ W \geq 3$. But it is already known that
when $W$ is a large odd number, a near-optimum $ (k,
W)$-USB can be obtained by a disjoint union of $ (W f_2
(k)) / 2$ copies of the optimum $ (k, 2)$-USB and a
noncompound $ (k, f_2 (k))$-USB, where the value of $
f_2 (k)$ is unknown for $ k \geq 8$. In this article,
we show that $ f_2 (k) = k + 3 - i / 3$, where $ 1 \leq
i \leq 6$ and $ i \equiv k \pmod 6$, and present an
explicit design for the noncompound $ (k, f_2
(k))$-USB. Combining these two results we obtain the
exact designs of $ (k, W)$-USBs for all $ k \geq 7$ and
odd $ W \geq 3$. The new $ (k, W)$-USB designs also
yield an efficient detailed routing algorithm.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "FPGA architecture; routing algorithm; universal switch
block",
}
@Article{Lim:2007:ISI,
author = "Sung Kyu Lim and Massoud Pedram",
title = "Introduction to special issue on demonstrable software
systems and hardware platforms",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "20:1--20:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255457",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hsu:2007:ESC,
author = "Chia-Jui Hsu and Ming-Yung Ko and Shuvra S.
Bhattacharyya and Suren Ramasubbu and Jos{\'e} Luis
Pino",
title = "Efficient simulation of critical synchronous dataflow
graphs",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "21:1--21:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255458",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "System-level modeling, simulation, and synthesis using
electronic design automation (EDA) tools are key steps
in the design process for communication and signal
processing systems, and the synchronous dataflow (SDF)
model of computation is widely used in EDA tools for
these purposes. Behavioral representations of modern
wireless communication systems typically result in
critical SDF graphs: These consist of hundreds of
components (or more) and involve complex intercomponent
connections with highly multirate relationships (i.e.,
with large variations in average rates of data transfer
or component execution across different subsystems).
Simulating such systems using conventional SDF
scheduling techniques generally leads to unacceptable
simulation time and memory requirements on modern
workstations and high-end PCs. In this article, we
present a novel simulation-oriented scheduler (SOS)
that strategically integrates several techniques for
graph decomposition and SDF scheduling to provide
effective, joint minimization of time and memory
requirements for simulating critical SDF graphs. We
have implemented SOS in the advanced design system
(ADS) from Agilent Technologies. Our results from this
implementation demonstrate large improvements in
simulating real-world, large-scale, and highly
multirate wireless communication systems (e.g., 3GPP,
Bluetooth, 802. 16e, CDMA 2000, XM radio, EDGE, and
Digital TV).",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Scheduling; simulation; synchronous dataflow",
}
@Article{Herrera:2007:FHS,
author = "Fernando Herrera and Eugenio Villar",
title = "A framework for heterogeneous specification and design
of electronic embedded systems in {SystemC}",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "22:1--22:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255459",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This work proposes a methodology which enables
heterogeneous specification of complex, electronic
systems in SystemC supporting the integration of
components under different models of computation
(MoCs). This feature is necessary in order to deal with
the growing complexity, concurrency, and heterogeneity
of electronic embedded systems. The specification
methodology is based on the SystemC standard language.
Nevertheless, the use of SystemC for heterogeneous
system specification is not straightforward. The first
problem to be addressed is the efficient and
predictable mapping of untimed events required by
abstract MoCs over the discrete-event MoC on which the
SystemC simulation kernel is based. This mapping is
essential in order to understand the simulation results
provided by the SystemC model of those MoCs. The
specification methodology proposes the set of rules and
guidelines required by each specific MoC. Moreover, the
methodology supports a smooth integration of several
MoCs in the same system specification. A set of
facilities is provided covering the deficiencies of the
language. These facilities constitute the
methodology-specific library called HetSC. The
methodology and associated library have been
demonstrated to be useful for the specification of
complex, heterogeneous embedded systems supporting
essential design tasks such as performance analysis and
SW generation.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Heterogeneous specification; models of computation;
SystemC",
}
@Article{Lee:2007:CCA,
author = "Hyung Gyu Lee and Naehyuck Chang and Umit Y. Ogras and
Radu Marculescu",
title = "On-chip communication architecture exploration: a
quantitative evaluation of point-to-point, bus, and
network-on-chip approaches",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "23:1--23:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255460",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Traditionally, design-space exploration for
systems-on-chip (SoCs) has focused on the computational
aspects of the problem at hand. However, as the number
of components on a single chip and their performance
continue to increase, a shift from computation-based to
communication-based design becomes mandatory. As a
result, the communication architecture plays a major
role in the area, performance, and energy consumption
of the overall system. This article presents a
comprehensive evaluation of three on-chip communication
architectures targeting multimedia applications.
Specifically, we compare and contrast the
network-on-chip (NoC) with point-to-point (P2P) and
bus-based communication architectures in terms of area,
performance, and energy consumption. As the main
contribution, we present complete P2P, bus-, and
NoC-based implementations of a real multimedia
application (i.e. the MPEG-2 encoder), and provide
direct measurements using an FPGA prototype and actual
video clips, rather than simulation and synthetic
workloads. We also support the experimental findings
through a theoretical analysis. Both experimental and
analysis results show that the NoC architecture scales
very well in terms of area, performance, energy, and
design effort, while the P2P and bus-based
architectures scale poorly on all accounts except for
performance and area, respectively.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "FPGA prototype; MPEG-2 encoder; Networks-on-chip;
point-to-point; system-on-chip",
}
@Article{Ha:2007:PHS,
author = "Soonhoi Ha and Sungchan Kim and Choonseung Lee and
Youngmin Yi and Seongnam Kwon and Young-Pyo Joo",
title = "{PeaCE}: a hardware-software codesign environment for
multimedia embedded systems",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "24:1--24:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255461",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Existent hardware-software (HW-SW) codesign tools
mainly focus on HW-SW cosimulation to build a virtual
prototyping environment that enables software design
and system verification without need of making a
hardware prototype. Not only HW-SW cosimulation, but
also HW-SW codesign methodology involves system
specification, functional simulation, design-space
exploration, and hardware-software cosynthesis. The
PeaCE codesign environment is the first full-fledged
HW-SW codesign environment that provides seamless
codesign flow from functional simulation to system
synthesis. Targeting for multimedia applications with
real-time constraints, PeaCE specifies the system
behavior with a heterogeneous composition of three
models of computation and utilizes features of the
formal models maximally during the whole design
process. It is also a reconfigurable framework in the
sense that third-party design tools can be integrated
to build a customized tool chain. Experiments with
industry-strength examples prove the viability of the
proposed technique.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design-space exploration; embedded systems;
Hardware-software codesign; hardware-software
cosimulation; model-based design",
}
@Article{Atienza:2007:HSE,
author = "David Atienza and Pablo G. {Del Valle} and Giacomo
Paci and Francesco Poletti and Luca Benini and Giovanni
{De Micheli} and Jose M. Mendias and Roman Hermida",
title = "{HW-SW} emulation framework for temperature-aware
design in {MPSoCs}",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "26:1--26:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255463",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "New tendencies envisage multiprocessor
systems-on-chips (MPSoCs) as a promising solution for
the consumer electronics market. MPSoCs are complex to
design, as they must execute multiple applications
(games, video) while meeting additional design
constraints (energy consumption, time-to-market).
Moreover, the rise of temperature in the die for MPSoCs
can seriously affect their final performance and
reliability. In this article, we present a new
hardware-software emulation framework that allows
designers a complete exploration of the thermal
behavior of final MPSoC designs early in the design
flow. The proposed framework uses FPGA emulation as the
key element to model hardware components of the
considered MPSoC platform at multimegahertz speeds. It
automatically extracts detailed system statistics that
are used as input to our software thermal library
running in a host computer. This library calculates at
runtime the temperature of on-chip components, based on
the collected statistics from the emulated system and
final floorplan of the MPSoC. This enables fast testing
of various thermal management techniques. Our results
show speedups of three orders of magnitude compared to
cycle-accurate MPSoC simulators.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "emulation; FPGA; MPSoC; temperature; Thermal-aware
design",
}
@Article{Wu:2007:EPM,
author = "Wei Wu and Lingling Jin and Jun Yang and Pu Liu and
Sheldon X.-D. Tan",
title = "Efficient power modeling and software thermal sensing
for runtime temperature monitoring",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "26:1--26:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255462",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The evolution of microprocessors has been hindered by
increasing power consumption and heat dissipation on
die. An excessive amount of heat creates reliability
problems, reduces the lifetime of a processor, and
elevates the cost of cooling and packaging
considerably. It is therefore imperative to be able to
monitor the temperature variations across the die in a
timely and accurate manner. \par
Most current techniques rely on on-chip thermal sensors
to report the temperature of the processor.
Unfortunately, significant variation in chip
temperature both spatially and temporally exposes the
limitation of the sensors. We present a compensating
approach to tracking chip temperature through an OS
resident software module that generates live power and
thermal profiles of the processor. We developed such a
software thermal sensor (STS) in a Linux system with a
Pentium 4 Northwood core. We employed highly efficient
numerical methods in our model to minimize the overhead
of temperature calculation. We also developed an
efficient algorithm for functional unit power modeling.
Our power and thermal models are calibrated and
validated against on-chip sensor readings, thermal
images of the Northwood heat spreader, and the
thermometer measurements on the package. The resulting
STS offers detailed power and temperature breakdowns of
each functional unit at runtime, enabling more
efficient online power and thermal monitoring and
management at a higher level, such as the operating
system.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Power; thermal",
}
@Article{Huang:2007:ESC,
author = "Po-Kuan Huang and Soheil Ghiasi",
title = "Efficient and scalable compiler-directed energy
optimization for realtime applications",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "27:1--27:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255464",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With continuing shrinkage of technology feature sizes,
the share of leakage in total energy consumption of
digital systems continues to grow. Coordinated supply
voltage and body bias throttling enables the compiler
to better optimize the total energy consumption of the
system in future technology nodes. We present a
compilation technique that targets realtime
applications running on embedded processors with
combined dynamic voltage scaling (DVS) and adaptive
body biasing (ABB) capabilities. Considering the delay
and energy penalty of switching between operating modes
of the processor, our compiler judiciously inserts
mode-switch instructions in selected locations of the
code and generates executable binary that is guaranteed
to meet the deadline constraint. More importantly, our
algorithm runs very fast and comes reasonably close to
the theoretical limit of energy optimization using DVS
+ ABB. At 65nm technology, we improve the energy
dissipation of the generated code by an average of 33.
20\% under deadline constraints. While our technique's
improvement in energy dissipation over conventional DVS
is marginal (6. 91\%) at 130nm, the average improvement
continues to grow to 13. 19\%, 22. 97\%, and 33. 21\%
for 90nm, 65nm, and 45nm technology nodes,
respectively. Compared to a recent ILP-based
competitor, we improve the runtime by more than three
orders of magnitude, while producing improved
results.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "energy-aware compiler; Leakage; technology scaling",
}
@Article{Shi:2007:CSO,
author = "Yiyu Shi and Paul Mesa and Hao Yu and Lei He",
title = "Circuit-simulated obstacle-aware {Steiner} routing",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "28:1--28:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255465",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article develops circuit-simulated routing
algorithms. We model the routing graph by an RC network
with terminals as inputs, and show that the faster an
output reaches its peak, the higher the possibility for
the corresponding Hanan or escape node to become a
Steiner point. This enables us to select Steiner points
and then apply any minimum spanning tree algorithm to
obtain obstacle-free or obstacle-aware Steiner routing.
Compared with existing algorithms, our algorithms have
significant gain on either wirelength or runtime for
obstacle-free routing, and on both wirelength and
runtime for obstacle-aware routing.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "OARSMT; Routing; RSMT; simulation",
}
@Article{Chakrapani:2007:PSC,
author = "Lakshmi N. Chakrapani and Pinar Korkmaz and Bilge E.
S. Akgul and Krishna V. Palem",
title = "Probabilistic system-on-a-chip architectures",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "29:1--29:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255466",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Parameter variations, noise susceptibility, and
increasing energy dissipation of cmos devices have been
recognized as major challenges in circuit and
microarchitecture design in the nanometer regime. Among
these, parameter variations and noise susceptibility
are increasingly causing cmos devices to behave in an
``unreliable'' or ``probabilistic'' manner. To address
these challenges, a shift in design paradigm from
current-day deterministic designs to ``statistical'' or
``probabilistic'' designs is deemed inevitable. To
respond to this need, in this article, we introduce and
study an entirely novel family of probabilistic
architectures: the probabilistic system-on-a-chip
(psoc). psoc architectures are based on cmos devices
rendered probabilistic due to noise, referred to as
probabilistic CMOS or PCMOS devices. We demonstrate
that in addition to harnessing the probabilistic
behavior of pcmos devices, psoc architectures yield
significant improvements, both in energy consumed as
well as performance in the context of probabilistic or
randomized applications with broad utility. All of our
application and architectural savings are quantified
using the product of the energy and performance,
denoted (energy $ \times $ performance): The
pcmos-based gains are as high as a substantial
multiplicative factor of over 560 when compared to a
competing energy-efficient cmos-based realization. Our
architectural design is application specific and
involves navigating design space spanning the algorithm
(application), its architecture (psoc), and the
probabilistic technology (pcmos).",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Embedded systems; probabilistic computing",
}
@Article{Hsieh:2007:FDC,
author = "Ang-Chih Hsieh and Tzu-Teng Lin and Tsuang-Wei Chang
and Tingting Hwang",
title = "A functionality-directed clustering technique for
low-power {MTCMOS} design---computation of
simultaneously discharging current",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "30:1--30:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255467",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multithreshold CMOS (MTCMOS) is a circuit style that
can effectively reduce leakage power consumption. Sleep
transistor sizing is the key issue when a MTCMOS
circuit is designed. If the size of sleep transistor is
large enough, the circuit performance can surely be
maintained but the area and dynamic power consumption
of the sleep transistor may increase. On the other
hand, if the sleep transistor size is too small, there
will be significant performance degradation because of
the increased resistance to ground. Previous approaches
[Kao et al. 1998; Anis et al. 2002] to designing sleep
transistor size are based mainly on mutually-exclusive
discharge patterns. However, these approaches
considered only the topology of a circuit (i.e.,
interconnections of nodes in the circuit-graph saving
the functionality of node). We observed that any two
possible simultaneously switching gates may not
discharge at the same time in terms of functionality.
Thus, we propose an algorithm to determine how to
cluster cells to share sleep transistors, while taking
both topology and functionality into consideration.
Moreover, one placement refinement algorithm that takes
clustering information into account will be presented.
At the logic level, the results show that the proposed
clustering method can achieve an average of 22\%
reduction in terms of the number of unit-size sleep
transistors as compared to a method that does not
consider functionality. At the physical level, two
placement results are discussed. The first is produced
by a traditional placement tool plus topology check
(functionality check) for insertion of sleep
transistors. It shows that the functionality check
algorithm produces 9\% less chip area as compared with
the topology check algorithm. The second result is
produced by a placement refinement algorithm where the
initial placement is done in the first placement
experiment. It shows that the placement refinement
algorithm achieves 5\% more reduction in area at the
expense of 4\% increase in wire length. Totally, around
14\% reduction is achieved by utilizing the clustering
information.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "DSTN; low power; MTCMOS; sleep transistor",
}
@Article{Dastidar:2007:VST,
author = "Tathagato Rai Dastidar and P. P. Chakrabarti",
title = "A verification system for transient response of analog
circuits",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "31:1--31:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255468",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a method for application of formal
techniques like model checking and equivalence checking
for validation of the transient response of nonlinear
analog circuits. We propose a temporal logic called Ana
CTL (computational tree logic for analog circuit
verification) which is suitable for specifying
properties specific to analog circuits. The application
of Ana CTL for validation of transient behavior of
arbitrarily nonlinear analog circuits is presented. The
transient response of a circuit under all possible
input waveforms is represented as a finite state
machine (FSM), by bounding and discretizing the
continuous state space of an analog circuit. We have
developed algorithms to run Ana CTL queries on this
discretized model using search-based methods which
reduce the runtime considerably by avoiding creation of
the whole FSM. The application of these methods on
several real-life analog circuits is presented and we
show that this system is a useful aid for detecting and
debugging early design errors. \par
We also present methods for checking the equivalence of
transient response of two analog circuits. The behavior
of two different analog circuits can rarely be exactly
similar. Hence, we introduce a notion of approximate
equivalence. A query language for checking different
notions of user-definable approximate equivalence is
presented which extends the syntax of the Ana CTL model
checking language. In its extended form, Ana CTL can be
used combining model checking with equivalence
checking.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Ana CTL; Analog circuits; equivalence checking; model
checking; query language; transient response",
}
@Article{Chang:2007:PRE,
author = "Kai-Hui Chang and Igor L. Markov and Valeria
Bertacco",
title = "Postplacement rewiring by exhaustive search for
functional symmetries",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "32:1--32:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255469",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose two new algorithms for rewiring: a
postplacement optimization that reconnects pins of a
given netlist without changing the logic function and
gate locations. In the first algorithm, we extract
small subcircuits consisting of several gates from the
design and reconnect pins according to the symmetries
of the subcircuits. To enhance the power of symmetry
detection, we also propose a graph-based symmetry
detector that can identify permutational and
phase-shift symmetries on multiple input and output
wires, as well as hybrid symmetries, creating abundant
opportunities for rewiring. Our second algorithm,
called long-range rewiring, is based on reconnecting
equivalent pins and can augment the first approach for
further optimization. We apply our techniques for
wirelength optimization and observe that they provide
wirelength reduction comparable to that achieved by
detailed placement.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "placement; rewiring; VLSI",
}
@Article{Mathaikutty:2007:EMD,
author = "Deepak Mathaikutty and Hiren Patel and Sandeep Shukla
and Axel Jantsch",
title = "{EWD}: a metamodeling driven customizable multi-{MoC}
system modeling framework",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "33:1--33:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255470",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present the EWD design environment and methodology,
a modeling and simulation framework suited for complex
and heterogeneous embedded systems with varying degrees
of expressibility and modeling fidelity. This
environment promotes the use of multiple models of
computation (MoCs) to support heterogeneity and
metamodeling for conformance tests of syntactic and
static semantics during the process of modeling.
Therefore, EWD is a multiple MoC modeling and
simulation framework that ensures conformance of the
MoC formalisms during model construction using a
metamodeling approach. In addition, EWD provides a
suite of translation tools that generate executable
models for two simulation frameworks to demonstrate its
language-independent modeling framework. The EWD
methodology uses the Generic Modeling Environment for
customization of the MoC-specific modeling syntax into
a visual representation. To embed the execution
semantics of the MoCs into the models, we have built
parsing and translation tools that leverage an
XML-based interoperability language. This
interoperability language is then translated into
executable Standard ML or Haskell models that can also
be analyzed by existing simulation frameworks such as
SML-Sys or ForSyDe. In summary, EWD is a metamodeling
driven multitarget design environment with multi-MoC
modeling capability.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "denotational semantics; ForSyDe; functional language;
heterogeneous system design; interoperable modeling
language; metamodel; Metamodeling; MoC; Ptolemy II;
SystemC",
}
@Article{Stitt:2007:BS,
author = "Greg Stitt and Frank Vahid",
title = "Binary synthesis",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "34:1--34:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255471",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recent high-level synthesis approaches and C-based
hardware description languages attempt to improve the
hardware design process by allowing developers to
capture desired hardware functionality in a well-known
high-level source language. However, these approaches
have yet to achieve wide commercial success due in part
to the difficulty of incorporating such approaches into
software tool flows. The requirement of using a
specific language, compiler, or development environment
may cause many software developers to resist such
approaches due to the difficulty and possible
instability of changing well-established robust tool
flows. Thus, in the past several years, synthesis from
binaries has been introduced, both in research and in
commercial tools, as a means of better integrating with
tool flows by supporting all high-level languages and
software compilers. Binary synthesis can be more easily
integrated into a software development tool-flow by
only requiring an additional backend tool, and it even
enables completely transparent dynamic translation of
executing binaries to configurable hardware circuits.
In this article, we survey the key technologies
underlying the important emerging field of binary
synthesis. We compare binary synthesis to several
related areas of research, and we then describe the key
technologies required for effective binary synthesis:
decompilation techniques necessary for binary synthesis
to achieve results competitive with source-level
synthesis, hardware/software partitioning methods
necessary to find critical binary regions suitable for
synthesis, synthesis methods for converting regions to
custom circuits, and binary update methods that enable
replacement of critical binary regions by circuits.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Binary synthesis; configurable logic; FPGA;
hardware/software codesign; hardware/software
partitioning; synthesis from software binaries; warp
processors",
}
@Article{Galanis:2007:SES,
author = "Michalis D. Galanis and Gregory Dimitroulakos and
Spyros Tragoudas and Costas E. Goutis",
title = "Speedups in embedded systems with a high-performance
coprocessor datapath",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "35:1--35:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255472",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents the speedups achieved in a
generic single-chip microprocessor system by employing
a high-performance datapath. The datapath acts as a
coprocessor that accelerates computational-intensive
kernel sections thereby increasing the overall
performance. We have previously introduced the datapath
which is composed of Flexible Computational Components
(FCCs). These components can realize any two-level
template of primitive operations. The automated
coprocessor synthesis method from high-level software
description and its integration to a design flow for
executing applications on the system is presented. For
evaluating the effectiveness of our coprocessor
approach, analytical study in respect to the type of
the custom datapath and to the microprocessor
architecture is performed. The overall application
speedups of several real-life applications relative to
the software execution on the microprocessor are
estimated using the design flow. These speedups range
from 1. 75 to 5. 84, with an average value of 3. 04,
while the overhead in circuit area is small. The design
flow achieved the acceleration of the applications near
to theoretical speedup bounds. A comparison with
another high-performance datapath showed that the
proposed coprocessor achieves smaller area-time
products by an average of 23\% for the generated
datapaths. Additionally, the FCC coprocessor achieves
better performance in accelerating kernels relative to
software-programmable DSP cores.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "chaining; coprocessor datapath; design flow; kernels;
Performance improvements; synthesis",
}
@Article{Roy:2007:EPA,
author = "Suchismita Roy and P. P. Chakrabarti and Pallab
Dasgupta",
title = "Event propagation for accurate circuit delay
calculation using {SAT}",
journal = j-TODAES,
volume = "12",
number = "3",
pages = "36:1--36:??",
month = aug,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1255456.1255473",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:12 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A SAT-based modeling for event propagation in
gate-level digital circuits, which is used for accurate
calculation of critical delay in combinational and
sequential circuits, is presented in this article. The
accuracy of the critical delay estimation process
depends on the accuracy with which the circuit in
operation is modeled. A high level of precision in the
modeling of the internal events in a circuit for the
sake of greater accuracy causes a combinatorial blowup
in the size of the problem, resulting in a scalability
bottleneck for which most existing techniques effect a
trade-off by restricting themselves to less precise
models. SAT based techniques have a good track record
in efficiency and scalability when the problem sizes
become too large for most other methods. This article
proposes a SAT-based technique for symbolic event
propagation within a circuit which facilitates the
estimation of the critical delay of circuits with a
greater degree of accuracy, while at the same time
scaling efficiently to large circuits. We report very
encouraging results on the ISCAS85 and ISCAS89
benchmark circuits using the proposed technique.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Critical delay; event propagation; SAT",
}
@Article{Yuh:2007:TFU,
author = "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang",
title = "Temporal floorplanning using the three-dimensional
transitive closure {subGraph}",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "37:1--37:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278350",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Improving logic capacity by time-sharing, dynamically
reconfigurable Field Gate Programmable Arrays (FPGAs)
are employed to handle designs of high complexity and
functionality. In this paper, we use a novel
graph-based topological floorplan representation, named
3D-subTCG (3-Dimensional Transitive Closure subGraph),
to deal with the 3-dimensional (temporal)
floorplanning/placement problem, arising from
dynamically reconfigurable FPGAs. The 3D-subTCG uses
three transitive closure graphs to model the temporal
and spatial relations between modules. We derive the
feasibility conditions for the precedence constraints
induced by the execution of the dynamically
reconfigurable FPGAs. Because the geometric
relationship is transparent to the 3D-subTCG and its
induced operations (i.e., we can directly detect the
relationship between any two tasks from the
representation), we can easily detect any violation of
the temporal precedence constraints on 3D-subTCG. We
also derive important properties of the 3D-subTCG to
reduce the solution space and shorten the running time
for 3D (temporal) foorplanning/placement. Experimental
results show that our 3D-subTCG-based algorithm is very
effective and efficient.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "partially dynamical reconfiguration; Reconfigurable
computing; temporal floorplanning",
}
@Article{Liu:2007:IEM,
author = "Jinfeng Liu and Pai H. Chou",
title = "Idle energy minimization by mode sequence
optimization",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "38:1--38:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278351",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents techniques for reducing idle
energy by mode-sequence optimization (MSO) under timing
constraints. Our component-level CoMSO algorithm
computes energy-optimal mode-transition sequences for
different lengths of idle intervals. Our system-level
SyMSO algorithm shifts tasks within slack intervals
while satisfying all timing and resource constraints in
the given schedule. Experimental results on a
commercial software-defined radio show that these new
techniques can reduce idle energy by 50--70\%, or
30--50\% of total system energy over previous
offline-optimal but unsequenced techniques based on
localized break-even-time analysis, thanks to rich
options offered by mode sequencing.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "communication speed selection;
communication/computation trade-offs; embedded
multi-processor; Functional partitioning; low-power
design",
}
@Article{Gorjiara:2007:UFE,
author = "Bita Gorjiara and Nader Bagherzadeh and Pai H. Chou",
title = "Ultra-fast and efficient algorithm for energy
optimization by gradient-based stochastic voltage and
task scheduling",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "39:1--39:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278352",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This paper presents a new technique, called Adaptive
Stochastic Gradient Voltage-and-Task Scheduling
(ASG-VTS), for power optimization of multicore hard
realtime systems. ASG-VTS combines stochastic and
energy-gradient techniques to simultaneously solve the
slack distribution and task reordering problem. It
produces very efficient results with few mode
transitions. Our experiments show that ASG-VTS reduces
number of mode transitions by 4. 8 times compared to
traditional energy-gradient-based approaches. Also, our
heuristic algorithm can quickly find a solution that is
as good as the optimal for a real-life GSM
encoder/decoder benchmark. The runtime of ASG-VTS is
150 times and 1034 times faster than energy-gradient
based and optimal ILP algorithms, respectively. Since
the runtime of ASG-VTS is very low, it is ideal for
design space exploration in system-level design tools.
We have also developed a web-based interface for
ASG-VTS algorithm.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Power management; slack distribution; voltage and task
scheduling",
}
@Article{Vanbroekhoven:2007:PDS,
author = "Peter Vanbroekhoven and Gerda Janssens and Maurice
Bruynooghe and Francky Catthoor",
title = "A practical dynamic single assignment transformation",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "40:1--40:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278353",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This paper presents a novel method to construct a
dynamic single assignment (DSA) form of array
intensive, pointer free C programs. A program in DSA
form does not perform any destructive update of scalars
and array elements; that is, each element is written at
most once. As DSA makes the dependencies between
variable references explicit, it facilitates complex
analyses and optimizations of programs. Existing
transformations into DSA perform a complex data flow
analysis with exponential analysis time, and they work
only for a limited class of input programs. Our method
removes irregularities from the data flow by adding
copy assignments to the program, so that it can use
simple data flow analyses. The presented DSA
transformation scales very well with growing program
sizes and overcomes a number of important limitations
of existing methods. We have implemented the method and
it is being used in the context of memory optimization
and verification of those optimizations. Experiments
show that in practice, the method scales well indeed,
and that added copy operations can be removed in case
they are unwanted.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "arrays; Data flow analysis; parallelization; reaching
definitions; single assignment",
}
@Article{Kobayashi:2007:MOS,
author = "Yuki Kobayashi and Murali Jayapala and Praveen
Raghavan and Francky Catthoor and Masaharu Imai",
title = "Methodology for operation shuffling and {L0} cluster
generation for low energy heterogeneous {VLIW}
processors",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "41:1--41:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278354",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Clustering L0 buffers is effective for energy
reduction in the instruction memory hierarchy of
embedded VLIW processors. However, the efficiency of
the clustering depends on the schedule of the target
application. Especially in heterogeneous or data
clustered VLIW processors, determining energy efficient
scheduling is more constraining. \par
This article proposes a realistic technique supported
by a tool flow to explore operation shuffling for
improving generation of L0 clusters. The tool flow
explores assignment of operations for each cycle and
generates various schedules. This approach makes it
possible to reduce energy consumption for various
processor architectures. However, the computational
complexity is large because of the huge exploration
space. Therefore, some heuristics are also developed,
which reduce the size of the exploration space while
the solution quality remains reasonable. Furthermore,
we also propose a technique to support VLIW processors
with multiple data clusters, which is essential to
apply the methodology to real world processors.
\par
The experimental results indicate potential gains of up
to 27. 6\% in energy in L0 buffers, through operation
shuffling for heterogeneous processor architectures as
well as a homogeneous architecture. Furthermore, the
proposed heuristics drastically reduce the exploration
search space by about 90\%, while the results are
comparable to full search, with average differences of
less than 1\%. The experimental results indicate that
energy efficiency can be improved in most of the media
benchmarks by the proposed methodology, where the
average gain is around 10\% in comparison with
generating clusters without operation shuffling.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Compilers for low energy; loop buffers; VLIW
processors",
}
@Article{Maslov:2007:TSR,
author = "D. Maslov and G. W. Dueck and D. M. Miller",
title = "Techniques for the synthesis of reversible {Toffoli}
networks",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "42:1--42:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278355",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present certain new techniques for the synthesis of
reversible networks of Toffoli gates, as well as
improvements to previous methods. Gate count and
technology oriented cost metrics are used. Two new
synthesis procedures employing Reed--Muller spectra are
introduced and shown to complement earlier synthesis
approaches. The previously proposed template
simplification method is enhanced through the
introduction of a faster and more efficient template
application algorithm, an updated classification of the
templates, and the addition of new templates of sizes 7
and 9. A resynthesis approach is introduced wherein a
sequence of gates is chosen from a network, and the
reversible specification it realizes is resynthesized
as an independent problem in hopes of reducing the
network cost. Empirical results are presented to show
that the methods are efficient in terms of the
realization of reversible benchmark specifications.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "circuit optimization; quantum computing; reversible
logic synthesis",
}
@Article{Bouchebaba:2007:MMO,
author = "Youcef Bouchebaba and Bruno Girodias and Gabriela
Nicolescu and El Mostapha Aboulhamid and Bruno
Lavigueur and Pierre Paulin",
title = "{MPSoC} memory optimization using program
transformation",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "43:1--43:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278356",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multiprocessor system-on-a-chip (MPSoC) architectures
have received a lot of attention in the past years, but
few advances in compilation techniques target these
architectures. This is particularly true for the
exploitation of data locality. Most of the compilation
techniques for parallel architectures discussed in the
literature are based on a single loop nest. This
article presents new techniques that consist in
applying loop fusion and tiling to several loop nests
and to parallelize the resulting code across different
processors. These two techniques reduce the number of
memory accesses. However, they increase dependencies
and thereby reduce the exploitable parallelism in the
code. This article tries to address this contradiction.
To optimize the memory space used by temporary arrays,
smaller buffers are used as a replacement. Different
strategies are studied to optimize the processing time
spent accessing these buffers. The experiments show
that these techniques yield a significant reduction in
the number of data cache misses (30\%) and in
processing time (50\%).",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compiler transformations; data cache; Data locality;
embedded systems",
}
@Article{Das:2007:FVT,
author = "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
title = "Functional verification of task partitioning for
multiprocessor embedded systems",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "44:1--44:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278357",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the advent of multiprocessor embedded platforms,
application partitioning and mapping have gained
primacy as a design step. The output of this design
step is a multithreaded partitioned application where
each thread is mapped to a processing element
(processor or ASIC) in the multiprocessor platform.
This partitioned application must be verified to be
consistent with the native unpartitioned application.
This verification task is called application (or task)
partitioning verification. \par
This work proposes a code-block-level
containment-checking-based methodology for application
partitioning verification. We use a UML-based
code-block-level modeling language which is rich enough
to model most designs. We formulate the application
partitioning verification problem as a special case of
the containment checking problem, which we call the
complete containment checking problem. We propose a
state space reduction technique specific to the
containment checking, reachability analysis, and
deadlock detection problems. We propose novel data
structures and token propagation methodologies which
enhance the efficiency of containment checking. We
present an efficient containment checking algorithm for
the application partitioning verification problem. We
develop a containment checking tool called TraceMatch
and present experimental results. We present a
comparison of the state space reduction achieved by
TraceMatch with that achieved by formal analysis and
verification tools like Spin, PEP, PROD, and LoLA.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Containment checking; multiprocessor embedded systems;
state space reduction; UML activity diagrams",
}
@Article{Huang:2007:CSS,
author = "Shih-Hsu Huang and Yow-Tyng Nieh",
title = "Clock skew scheduling with race conditions
considered",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "45:1--45:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278358",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we provide a fresh viewpoint to the
interactions between clock skew scheduling and delay
insertion. A race-condition-aware (RCA) clock skew
scheduling is proposed to determine the clock skew
schedule by taking race conditions (i.e., hold
violations) into account. Our objective is not only to
optimize the clock period, but also to minimize
heuristically the required inserted delay. Compared
with previous work, our major contribution includes the
following two aspects. First, our approach achieves
exactly the same results, but has significant
improvement in time complexity. Second, our viewpoint
can be generalized to other sequential timing
optimization techniques.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "logic synthesis; performance optimization; Sequential
circuits; timing optimization",
}
@Article{Wang:2007:ETR,
author = "Gang Wang and Wenrui Gong and Brian Derenzi and Ryan
Kastner",
title = "Exploring time\slash resource trade-offs by solving
dual scheduling problems with the ant colony
optimization",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "46:1--46:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278359",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Design space exploration during high-level synthesis
is often conducted through ad hoc probing of the
solution space using some scheduling algorithm. This is
not only time consuming but also very dependent on
designer's experience. We propose a novel design
exploration method that exploits the duality of time-
and resource-constrained scheduling problems. Our
exploration automatically constructs a time/area
tradeoff curve in a fast, effective manner. It is a
general approach and can be combined with any
high-quality scheduling algorithm. In our work, we use
the max-min ant colony optimization technique to solve
both time- and resource-constrained scheduling
problems. Our algorithm provides significant
solution-quality savings (average 17. 3\% reduction of
resource counts) with similar runtime compared to using
force-directed scheduling exhaustively at every time
step. It also scales well across a comprehensive
benchmark suite constructed with classic and real-life
samples.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "ant colony optimization; Design space exploration;
instruction scheduling; max-min ant system",
}
@Article{Ghosh:2007:LPT,
author = "Swaroop Ghosh and Swarup Bhunia and Kaushik Roy",
title = "Low-Power and Testable Circuit Synthesis Using
{Shannon} Decomposition",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "47:1--47:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278360",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:34:15 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/bibnet/authors/s/shannon-claude-elwood.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Structural transformation of a design to enhance its
testability while satisfying design constraints on
power and performance can result in improved test cost
and test confidence. In this article, we analyze the
testability in a new style of logic design based on
Shannon's decomposition and supply gating. We observe
that the tree structure of a logic circuit due to
Shannon's decomposition makes it intrinsically more
testable than a conventionally synthesized circuit,
while at the same time providing an improvement in
active power. We have analyzed four different aspects
of the testability of a circuit: (a) IDDQ test
sensitivity, (b) test power during scan-based testing,
(c) test length (for both ATPG-generated deterministic
and random patterns), and (d) noise immunity.
Simulation results on a set of MCNC benchmarks show
promising results on all these aspects (an average
improvement of 94\% in IDDQ sensitivity, 50\% in test
power, 19\% (21\%) in test length for deterministic
(random) patterns, and 50\% in coupling noise
immunity). We have also demonstrated that the new logic
structure can improve parametric yield (6\% on average)
of a circuit under process variations when considering
a bound on circuit leakage.",
acknowledgement = ack-nhfb,
acmid = "1278360",
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Design-for-test; dynamic supply gating; IDDQ; noise
immunity; Shannon expansion; test coverage; test
power",
subject-dates = "Claude Elwood Shannon (1916--2001)",
}
@Article{Ostler:2007:IHT,
author = "Chris Ostler and Karam S. Chatha and Vijay Ramamurthi
and Krishnan Srinivasan",
title = "{ILP} and heuristic techniques for system-level design
on network processor architectures",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "48:1--48:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278361",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Network processors incorporate several architectural
features, including symmetric multiprocessing (SMP),
block multithreading, and multiple memory elements, to
support the high-performance requirements of current
day applications. This article presents automated
system-level design techniques for application
development on such architectures. We propose integer
linear programming formulations and heuristic
techniques for process allocation and data mapping on
SMP and block-multithreading-based network processors.
The techniques incorporate process transformations and
multithreading-aware data mapping to maximize the
throughput of the application. The article presents
experimental results that evaluate the techniques by
implementing network processing applications on the
Intel IXP 2400 architecture.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "block multithreading; multiprocessor",
}
@Article{Gopalakrishnan:2007:OPD,
author = "Sivaram Gopalakrishnan and Priyank Kalla",
title = "Optimization of polynomial datapaths using finite ring
algebra",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "49:1--49:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278362",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents an approach to area optimization
of arithmetic datapaths at register-transfer level
(RTL). The focus is on those designs that perform
polynomial computations (add, mult) over finite
word-length operands (bit-vectors). We model such
polynomial computations over $m$-bit vectors as algebra
over finite integer rings of residue classes $ Z_2^m$.
Subsequently, we use the number-theoretic and algebraic
properties of such rings to transform a given datapath
computation into another, bit-true equivalent
computation. We also derive a cost model to estimate,
at RTL, the area cost of the computation. Using the
transformation procedure along with the cost model, we
devise algorithmic procedures to search for a
lower-cost implementation. We show how these
theoretical concepts can be applied to RTL optimization
of arithmetic datapaths within practical CAD settings.
Experiments conducted over a variety of benchmarks
demonstrate substantial optimizations using our
approach.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "arithmetic datapaths; finite ring algebra; High-level
synthesis; modulo arithmetic; polynomial datapaths",
}
@Article{Hu:2007:IHM,
author = "Q. Hu and P. G. Kjeldsberg and A. Vandecappelle and M.
Palkovic and F. Catthoor",
title = "Incremental hierarchical memory size estimation for
steering of loop transformations",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "50:1--50:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278363",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern embedded multimedia and telecommunications
systems need to store and access huge amounts of data.
This becomes a critical factor for the overall energy
consumption, area, and performance of the systems. Loop
transformations are essential to improve the data
access locality and regularity in order to optimally
design or utilize a memory hierarchy. However, due to
abstract high-level cost functions, current loop
transformation steering techniques do not take the
memory platform sufficiently into account. They usually
also result in only one final transformation solution.
On the other hand, the loop transformation search space
for real-life applications is huge, especially if the
memory platform is still not fully fixed. Use of
existing loop transformation techniques will therefore
typically lead to suboptimal end-products. It is
critical to find all interesting loop transformation
instances. This can only be achieved by performing an
evaluation of the effect of later design stages at the
early loop transformation stage. \par
This article presents a fast incremental hierarchical
memory-size requirement estimation technique. It
estimates the influence of any given sequence of loop
transformation instances on the mapping of application
data onto a hierarchical memory platform. As the exact
memory platform instantiation is often not yet defined
at this high-level design stage, a platform-independent
estimation is introduced with a Pareto curve output for
each loop transformation instance. Comparison among the
Pareto curves helps the designer, or a steering tool,
to find all interesting loop transformation instances
that might later lead to low-power data mapping for any
of the many possible memory hierarchy instances.
Initially, the source code is used as input for
estimation. However, performing the estimation
repeatedly from the source code is too slow for large
search space exploration. An incremental approach,
based on local updating of the previous result, is
therefore used to handle sequences of different loop
transformations. Experiments show that the initial
approach takes a few seconds, which is two orders of
magnitude faster than state-of-the-art solutions but
still too costly to be performed interactively many
times. The incremental approach typically takes just a
few milliseconds, which is another two orders of
magnitude faster than the initial approach. This huge
speedup allows us for the first time to handle
real-life industrial-size applications and get
realistic feedback during loop transformation
exploration.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code transformation; Data optimization; high-level
synthesis; memory architecture exploration; memory size
estimation",
}
@Article{You:2007:CCP,
author = "Yi-Ping You and Chung-Wen Huang and Jenq Kuen Lee",
title = "Compilation for compact power-gating controls",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "51:1--51:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278364",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power leakage constitutes an increasing fraction of
the total power consumption in modern semiconductor
technologies due to the continuing size reductions and
increasing speeds of transistors. Recent studies have
attempted to reduce leakage power using integrated
architecture and compiler power-gating mechanisms. This
approach involves compilers inserting instructions into
programs to shut down and wake up components, as
appropriate. While early studies showed this approach
to be effective, there are concerns about the large
amount of power-control instructions being added to
programs due to the increasing amount of components
equipped with power-gating controls in SoC design
platforms. In this article we present a sink-n-hoist
framework for a compiler to generate balanced
scheduling of power-gating instructions. Our solution
attempts to merge several power-gating instructions
into a single compound instruction, thereby reducing
the amount of power-gating instructions issued. We
performed experiments by incorporating our compiler
analysis and scheduling policies into SUIF compiler
tools and by simulating the energy consumption using
Wattch toolkits. The experimental results demonstrate
that our mechanisms are effective in reducing the
amount of power-gating instructions while further
reducing leakage power compared to previous methods.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "balanced scheduling; Compilers for low power;
data-flow analysis; leakage-power reduction;
power-gating mechanisms",
}
@Article{Chen:2007:NMA,
author = "Gang Chen and Xiaoyu Song and Feng Liu and Qingping
Tan and Fei He",
title = "A note on {``A mapping algorithm for computer-assisted
exploration in the design of embedded systems''}",
journal = j-TODAES,
volume = "12",
number = "4",
pages = "52:1--52:??",
month = sep,
year = "2007",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1278349.1278365",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:09:35 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
note = "See \cite{Mariatos:2001:MAC}.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dutt:2008:Ea,
author = "Nikil Dutt",
title = "Editorial",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297667",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hsiao:2008:ISS,
author = "Michael S. Hsiao and Robert B. Jones",
title = "Introduction to special section on high-level design,
validation, and test",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297668",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cabodi:2008:BID,
author = "Gianpiero Cabodi and Marco Murciano and Sergio Nocco
and Stefano Quer",
title = "Boosting interpolation with dynamic localized
abstraction and redundancy removal",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297669",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "SAT--based Unbounded Model Checking based on Craig
Interpolants is often able to overcome BDDs and other
SAT--based techniques on large verification instances.
Based on refutation proofs generated by SAT solvers,
interpolants provide compact circuit representations of
state sets, as they abstract away several nonrelevant
details of the proofs. We propose three main
contributions, aimed at controlling interpolant size
and traversal depth. First of all, we introduce
interpolant--based dynamic abstraction to reduce the
support of computed interpolants. Subsequently, we
propose new advances in interpolant compaction by
redundancy removal. Finally, we introduce interpolant
computation exploiting circuit quantification, instead
of SAT refutation proofs. These techniques heavily rely
on an effective application of the incremental SAT
paradigm. The experimental results proposed in this
paper are specifically oriented to prove properties,
rather than disproving them, i.e., they target complete
verification instead of simply hunting bugs. They show
how this methodology is able to stretch the
applicability of interpolant--based Model Checking to
larger and deeper verification instances.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "abstraction; Interpolant; redundancy removal",
}
@Article{Boule:2008:ABA,
author = "Marc Boul{\'e} and Zeljko Zilic",
title = "Automata-based assertion-checker synthesis of {PSL}
properties",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297670",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Assertion-based verification with languages such as
PSL is gaining in importance. From assertions, one can
generate hardware assertion checkers for use in
emulation, simulation acceleration and silicon debug.
We present techniques for checker generation of the
complete set of PSL properties, including all variants
of operators, both strong and weak. A full
automata-based approach allows an entire assertion to
be represented by a single automaton, hence allowing
optimizations that can not be done in a modular
approach where subcircuits are created only for
individual operators. For this purpose, automata
algorithms are developed for the base cases, and a
complete set of rewrite rules is derived for other
operators. Automata splitting is introduced for an
efficient implementation of the eventually! operator.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "assertion checkers; Assertion-Based Verification;
automata; emulation; hardware; PSL",
}
@Article{Rahaman:2008:CTB,
author = "H. Rahaman and J. Mathew and D. K. Pradhan and A. M.
Jabir",
title = "{C}-testable bit parallel multipliers over {$ {\rm
GF}(2^m) $}",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297671",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a C-testable design of polynomial basis
(PB) bit-parallel (BP) multipliers over GF(2 m ) for
100\% coverage of stuck-at faults. Our design method
also includes the method for test vector generation,
which is simple and efficient. C-testability is
achieved with three control inputs and approximately
6\% additional hardware. Only 8 constant vectors are
required irrespective of the sizes of the fields and
primitive polynomial. We also present a Built-In
Self-Test (BIST) architecture for generating the test
vectors efficiently, which eliminates the need for the
extra control inputs. Since these circuits have
critical applications as parts of cryptography (e.g.,
Elliptic Curve Crypto (ECC) systems) hardware, the BIST
architecture may provide with added level of security,
as the tests would be done internally and without the
requirement of probing by external testing equipment.
Finally we present experimental results comprising the
area, delay and power of the testable multipliers of
various sizes with the help of the Synopsys{\reg} tools
using UMC 0. 18 micron CMOS technology library.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "built-in self-test; C-testable; cryptography; digital
signal processing; error control code; fault; Galois
field; multiplier; polynomials; stuck-at fault;
testing; TPG; VLSI design",
}
@Article{Taktak:2008:TAD,
author = "Sami Taktak and Jean-Lou Desbarbieux and Emmanuelle
Encrenaz",
title = "A tool for automatic detection of deadlock in wormhole
networks on chip",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297672",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present an extension of Duato's necessary and
sufficient condition a routing function must satisfy in
order to be deadlock-free, to support environment
constraints inducing extra-dependencies between
messages. We also present an original algorithm to
automatically check the deadlock-freeness of a network
with a given routing function. A prototype tool has
been developed and automatic deadlock checking of large
scale networks with various routing functions have been
successfully achieved. We provide comparative results
with standard approach, highlighting the benefits of
our method.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Deadlock; interconnection networks; networks on chip;
wormhole routing",
}
@Article{Zhou:2008:NER,
author = "Hai Zhou",
title = "A new efficient retiming algorithm derived by formal
manipulation",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297673",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A new efficient algorithm is derived for the minimal
period retiming by formal manipulation. Contrary to all
previous algorithms, which used fixed period
feasibility checking to binary-search a candidate
range, the derived algorithm checks the optimality of a
feasible period directly. It is much simpler and more
efficient than previous algorithms. Experimental
results showed that it is even faster than ASTRA, an
efficient heuristic algorithm. Since the derived
algorithm is incremental by nature, it also opens the
opportunity to be combined with other optimization
techniques.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "algorithm derivation; Clockperiod minimization;
retiming",
}
@Article{Krishnaswamy:2008:PTM,
author = "Smita Krishnaswamy and George F. Viamontes and Igor L.
Markov and John P. Hayes",
title = "Probabilistic transfer matrices in symbolic
reliability analysis of logic circuits",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297674",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose the probabilistic transfer matrix (PTM)
framework to capture nondeterministic behavior in logic
circuits. PTMs provide a concise description of both
normal and faulty behavior, and are well-suited to
reliability and error susceptibility calculations. A
few simple composition rules based on connectivity can
be used to recursively build larger PTMs (representing
entire logic circuits) from smaller gate PTMs. PTMs for
gates in series are combined using matrix
multiplication, and PTMs for gates in parallel are
combined using the tensor product operation. PTMs can
accurately calculate joint output probabilities in the
presence of reconvergent fanout and inseparable joint
input distributions. To improve computational
efficiency, we encode PTMs as algebraic decision
diagrams (ADDs). We also develop equivalent ADD
algorithms for newly defined matrix operations such as
{\tt eliminate\_variables} and {\tt
eliminate\_redundant\_variables}, which aid in the
numerical computation of circuit PTMs. We use PTMs to
evaluate circuit reliability and derive polynomial
approximations for circuit error probabilities in terms
of gate error probabilities. PTMs can also analyze the
effects of logic and electrical masking on error
mitigation. We show that ignoring logic masking can
overestimate errors by an order of magnitude. We
incorporate electrical masking by computing error
attenuation probabilities, based on analytical models,
into an extended PTM framework for reliability
computation. We further define a susceptibility measure
to identify gates whose errors are not well masked. We
show that hardening a few gates can significantly
improve circuit reliability.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "fault tolerance; Symbolic analysis",
}
@Article{Tzeng:2008:VPS,
author = "Chao-Wen Tzeng and Jheng-Syun Yang and Shi-Yu Huang",
title = "A versatile paradigm for scan chain diagnosis of
complex faults using signal processing techniques",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297675",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scan chains are popularly used as the channels for
silicon testing and debugging. However, they have also
been identified as one of the culprits of silicon
failure more recently. To cope with this problem,
several scan chain diagnosis approaches have been
proposed in the past. The existing methods, however,
suffer from one common drawback---that is, they rely on
fault models and matching heuristics to locate the
faults. Such a paradigm may run into difficulty when
the fault under diagnosis does not match the fault
model exactly, for example, when there is a bridging
between a flip-flop and a logic cell, or the fault is
temporal and only manifests itself intermittently. In
light of this, we propose in this article a more
versatile model-free paradigm for locating the faulty
flip-flops in a scan chain, incorporating a number of
signal processing techniques, such as filtering and
edge detection. These techniques performed on the test
responses of the failing chip under diagnosis directly
can effectively reveal the fault location(s) in a scan
chain. As compared to the previous works, our approach
is better capable of handling intermittent faults and
bridging faults, even under nonideal conditions, for
example, when the core logic is also faulty.
Experimental results on several real designs indicate
that this approach can indeed catch some nasty faults
that previous methods could not catch.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design for testability; Diagnosis; fault; profiling;
scan chain",
}
@Article{Johnson:2008:IME,
author = "F. Ryan Johnson and Joann M. Paul",
title = "Interrupt modeling for efficient high-level scheduler
design space exploration",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297676",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Single Chip Heterogeneous Multiprocessors executing a
wide variety of software are increasingly common in
consumer electronics. Because of the mix of real-time
and best effort software across the entire chip, a key
design element of these systems is the choice of
scheduling strategy. Without task migration, the
benefits of single chip processing cannot be fully
realized. Previously, high-level modeling environments
have not been capable of modeling asynchronous events
such as interrupts and preemptive scheduling while
preserving the performance benefits of high level
simulation. This paper shows how extensions to Modeling
Environment for Software and Hardware (MESH) enable
precise modeling of these asynchronous events while
running more than 1000 faster than cycle-accurate
simulation. We discuss how we achieved this and
illustrate its use in modeling preemptive scheduling.
We evaluate the potential of migrating running tasks
between processors to improve performance in a
multimedia cell phone example. We show that by allowing
schedulers to rebalance processor loads as new tasks
arrive significant performance gains can be achieved
over statically partitioned and dynamic scheduling
approaches. In our example, we show that system
response time can be improved by as much as 1. 96 times
when a preemptive migratory scheduler is used, despite
the overhead incurred by scheduling tasks across
multiple processors and transferring state during the
migration of running tasks. The contribution of this
work is to provide a framework for evaluating
preemptive scheduling policies and task migration in a
high level simulator, by combining the new ability to
model interrupts with dramatically increased efficiency
in the high-level modeling of scheduling and
communication MESH already provides.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Heterogeneous chip multiprocessors; MESH; scenario
oriented design",
}
@Article{Ogras:2008:AOP,
author = "Umit Y. Ogras and Radu Marculescu",
title = "Analysis and optimization of prediction-based flow
control in networks-on-chip",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297677",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Networks-on-Chip (NoC) communication architectures
have emerged recently as a scalable solution to on-chip
communication problems. While the NoC architectures may
offer higher bandwidth compared to traditional
bus-based communication, their performance can degrade
significantly in the absence of effective flow control
algorithms. Unfortunately, flow control algorithms
developed for macronetworks, either rely on local
information, or suffer from large communication
overhead and unpredictable delays. Hence, using them in
the NoC context is problematic at best. For this
reason, we propose a predictive closed-loop flow
control mechanism and make the following contributions:
First, we develop traffic source and router models
specifically targeted to NoCs. Then, we utilize these
models to predict the possible congestion in the
network. Based on this information, the proposed scheme
controls the packet injection rate at traffic sources
in order to regulate the total number of packets in the
network. We also illustrate the proposed traffic source
model and the applicability of the proposed flow
controller to actual designs using real NoC
implementations. Finally, simulations and experimental
study using our FPGA prototype show that the proposed
controller delivers a better performance compared to
the traditional switch-to-switch flow control
algorithms under various real and synthetic traffic
patterns.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "congestion control; flow control; Multi-processor
systems; networks-on-chip",
}
@Article{Chang:2008:TCS,
author = "Kuei-Chung Chang and Jih-Sheng Shen and Tien-Fu Chen",
title = "Tailoring circuit-switched network-on-chip to
application-specific system-on-chip by two optimization
schemes",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297678",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the number of cores on a chip increases, power
consumed by the communication structures takes a
significant portion of the overall power budget. In
this article, we first propose a circuit-switched
interconnection architecture which uses crossroad
switches to construct dedicated channels dynamically
between any pairs of cores for nonhuge
application-specific SoCs. The structure of the
crossroad switch is simple, which can be regarded as a
NoC-lite router, and we can easily construct a
low-power on-chip network with these switches by a
system-level design methodology. We also present the
design methodology to tailor the proposed
interconnection architecture to low-power structures by
two proposed optimization schemes with profiled
communication characteristics. The first scheme is
power-aware topology construction, which can build
low-power application-specific interconnection
topologies. To further reduce the power consumption, we
propose the second optimization scheme to predetermine
the operating mode of dual-mode switches in the NoC at
runtime. We evaluate several interconnection
techniques, and the results show that the proposed
architecture is more low-power and high-performance
than others under some constraints and scale
boundaries. We take multimedia applications as case
studies, and experimental results show the power
savings of power-aware topology approximate to 49\% of
the interconnection architecture. The power consumption
can be further reduced approximately 25\% by applying
partially dedicated path mechanism.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Application specific; interconnection; low power;
networks on chip; systems on chips",
}
@Article{Abbasian:2008:WBD,
author = "A. Abbasian and S. Hatami and A. Afzali-Kusha and M.
Pedram",
title = "Wavelet-based dynamic power management for
nonstationary service requests",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297679",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, a wavelet-based dynamic power
management policy (WBDPM) is proposed. In this
approach, the workload source (service requester) is
modeled by a nonstationary time series which, in turn,
represented by a nondecimated Haar wavelet as its
basis. The proposed approach is robust and has the
ability to minimize energy dissipation under different
performance constraints. To assess the accuracy of the
model, the algorithm was implemented for data extracted
from the hard disks of computers. Prediction results of
this approach for the case of a nonstationary service
requester exhibit accuracies of more than 95\%.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Dynamic power management; low-power system design;
nonstationary service request; wavelet-based
prediction",
}
@Article{Su:2008:SNT,
author = "Yu-Shih Su and Po-Hsien Chang and Shih-Chieh Chang and
Tingting Hwang",
title = "Synthesis of a novel timing-error detection
architecture",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297680",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Delay variation can cause a design to fail its timing
specification. Ernst et al. [2003] observe that the
worst delay of a design is least probable to occur.
They propose a mechanism to detect and correct
occasional errors while the design can be optimized for
the common cases. Their experimental results show
significant performance (or power) gain as compared
with the worst-case design. However, the architecture
in Ernst et al. [2003] suffers the short path problem,
which is difficult to resolve. In this article, we
propose a novel error-detecting architecture to solve
the short path problem. Our experimental results show
considerable performance gain can be achieved with
reasonable area overhead.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "fault tolerance; Logic synthesis",
}
@Article{Raabe:2008:RDS,
author = "Andreas Raabe and Philipp A. Hartmann and Joachim K.
Anlauf",
title = "{ReChannel}: {Describing} and simulating
reconfigurable hardware in {systemC}",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297681",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the ongoing integration of (dynamic)
reconfiguration into current system models, new
methodologies and tools are needed to help the designer
during the development process. This article introduces
a language extension for SystemC along with a design
methodology for describing and simulating dynamically
reconfigurable systems at all levels of abstraction.
The presented library provides maximum freedom of
description of reconfiguration behavior and its
control, while featuring simulation of runtime
configuration, removal, and exchange of custom modules
as well as third-party IP-cores during the complete
architecture refinement process. When designing at
RT-level, the resulting hardware description can easily
be synthesized by standard synthesis tools.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "dynamic reconfiguration; hardware description;
Reconfigurable hardware; refinement; simulation;
SystemC",
}
@Article{Zhou:2008:AAS,
author = "Xiangrong Zhou and Chenjie Yu and Alokika Dash and
Peter Petrov",
title = "Application-aware snoop filtering for low-power cache
coherence in embedded multiprocessors",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297682",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Maintaining local caches coherently in shared-memory
multiprocessors results in significant power
consumption. The customization methodology we propose
exploits the fact that in embedded systems, important
knowledge is available to the system designers
regarding memory sharing between tasks. We demonstrate
how the snoop-induced cache probings can be
significantly reduced by identifying and exploiting in
a deterministic way the shared memory regions between
the processors. Snoop activity is enabled only for the
accesses referring to known shared regions. The
hardware support is not only cost efficient, but also
software programmable, which allows for
reprogrammability and customization across different
tasks and applications.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Cache coherence; embedded multiprocessors; low-power
embedded systems; snoop filtering",
}
@Article{Ahn:2008:SSC,
author = "Yongjin Ahn and Keesung Han and Ganghee Lee and
Hyunjik Song and Junhee Yoo and Kiyoung Choi and
Xingguang Feng",
title = "{SoCDAL}: {System-on-chip design AcceLerator}",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297683",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Time-to-market pressure and the ever-growing design
complexity of multiprocessor system-on-chips have
demanded an efficient design environment that enables
fast exploration of large design space. In this
article, we introduce a new design environment, called
SoCDAL, for accelerating multiprocessor system-on-chip
design through fast design-space exploration targeting
real-time multimedia systems. SoCDAL is a set of mostly
automated tools covering system specification,
hardware/software estimation,
application-to-architecture mapping, simulation model
generation, and system verification through simulation.
For system specification, the process network model has
been widely used for system specification because of
its modeling capability. However, it is hard to use for
real-time systems design, since its behavior cannot be
estimated statically. We introduce a new approach which
enables analyzing a process network model statically
with some restrictions. For the hardware/software
estimation, we analyze codes statically.
Application-to-architecture mapping process implements
a novel algorithm to support an arbitrary number of
processors, with performance evaluation by static
scheduling considering communication behavior. Mapping
results are used to generate simulation models
automatically at several transaction levels to be
pipelined to a commercial tool. We show the
effectiveness of our approaches by some experimental
results with multimedia applications such as JPEG, H.
263, and H. 264 encoders, as well as an H. 264
decoder.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "application-to-architecture mapping; Codesign;
design-space exploration; multiprocessor
system-on-chip; process networks; scheduling;
simulation; specification; static hardware/software
estimation; synchronous dataflow; transaction-level
model; worst-case execution time",
}
@Article{Zamora:2008:EMU,
author = "Nicholas H. Zamora and Xiaoping Hu and Umit Y. Ogras
and Radu Marculescu",
title = "Enabling multimedia using resource-constrained video
processing techniques: a node-centric perspective",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297684",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Successful proliferation of multimedia-enabled devices
and advances in very large-scale integration (VLSI)
technology has spawned new research efforts in
migrating video processing applications onto ever
smaller and more inexpensive devices. This article
focuses on the technical challenges associated with
that migration. \par
Due to limitations in size, battery lifetime, and,
ultimately, cost, mapping complex video applications
onto resource-constrained systems is a very challenging
proposition. To this end, we first consider a
technique, region-of-interest (ROI) processing, of
defining a window within a video frame and only
operating on the data inside that window, ignoring the
rest of the frame. By using this lossy technique, the
processing requirements can be reduced by roughly 80\%
while the error introduced in the quality of the
results is roughly 10\%. The other technique is
adaptive data partitioning (ADP) combined with a
content-based power management algorithm. By
distributing video processing among multiple processors
and shutting them down when they are not needed, the
energy consumed per processor can be reduced by 60\%
without sacrificing the performance of the underlying
video-based application. \par
Taken together, these novel techniques enable ambient
multimedia systems and maintain the needed overall
efficiency in video processing.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "data partitioning; lossy and lossless video
processing; real-time video processing;
Region-of-interest (ROI)",
}
@Article{Lee:2008:FCB,
author = "Kyungsoo Lee and Naehyuck Chang and Jianli Zhuo and
Chaitali Chakrabarti and Sudheendra Kadri and Sarma
Vrudhula",
title = "A fuel-cell-battery hybrid for portable embedded
systems",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "19:1--19:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297685",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents our work on the development of a
fuel cell (FC) and battery hybrid (FC-Bh) system for
use in portable microelectronic systems. We describe
the design and control of the hybrid system, as well as
a dynamic power management (DPM)-based energy
management policy that extends its operational
lifetime. The FC is of the proton exchange membrane
(PEM) type, operates at room temperature, and has an
energy density which is 4--6 times that of a Li-ion
battery. The FC cannot respond to sudden changes in the
load, and so a system powered solely by the FC is not
economical. An FC-Bh power source, on the other hand,
can provide the high energy density of the FC and the
high power density of a battery. \par
In this work we first describe the prototype FC-Bh
system that we have built. Such a prototype helps to
characterize the performance of a hybrid power source,
and also helps explore new energy management strategies
for embedded systems powered by hybrid sources. Next we
describe a Matlab/Simulink-based FC-Bh system simulator
which serves as an alternate experimental platform and
that enables quick evaluation of system-level control
policies. Finally, we present an optimization framework
that explicitly considers the characteristics of the
FC-Bh system and is aimed at minimizing the fuel
consumption. This optimization framework is applied on
top of a prediction-based DPM policy and is used to
derive a new fuel-efficient DPM scheme. The proposed
scheme demonstrates up to 32\% system lifetime
extension compared to a competing scheme when run on a
real trace-based MPEG encoding example.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "battery; DPM; fuel cell; hybrid systems; Simulation;
simulator",
}
@Article{Chao:2008:LPG,
author = "Wei-Chung Chao and Wai-Kei Mak",
title = "Low-power gated and buffered clock network
construction",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "20:1--20:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297686",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose an efficient algorithm to construct a
low-power zero-skew gated clock network, given the
module locations and activity information. Unlike
previous works, we consider masking logic insertion and
buffer insertion simultaneously, and guarantee to yield
a zero-skew clock tree. Both the logical and physical
information of the modules are carefully taken into
consideration when determining where masking logic
should be inserted. We also account for the power
overhead of the control signals so that the total
average power consumption of the constructed zero-skew
gated clock network can be minimized. To this end, we
present a recursive approach to compute the effective
switched capacitance of a general gated and buffered
clock network, accounting for both the clock tree's and
controller tree's switched capacitance. The power
consumptions of the gated clock networks constructed by
our algorithm are 20 to 36\% lower than those reported
in the best previous work in the literature.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "buffer; clock gating; Clock tree; low power;
zero-skew",
}
@Article{Sham:2008:OWR,
author = "Chiu-Wing Sham and Evangeline F. Y. Young and Hai
Zhou",
title = "Optimizing wirelength and routability by searching
alternative packings in floorplanning",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "21:1--21:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297687",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recent advances in VLSI technology have made
optimization of the interconnect delay and routability
of a circuit more important. We should consider
interconnect planning as early as possible. We propose
a postfloorplanning step to reduce the interconnect
cost of a floorplan by searching alternative packings.
If a packing contains a rectangular bounding box of a
group of modules, we can rearrange the blocks in the
bounding box to obtain a new floorplan with the same
area, but possibly with a smaller interconnect cost.
Experimental results show that we can reduce the
interconnect cost of a packing without any penalty in
area.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Floorplanning; wirelength reduction",
}
@Article{Wu:2008:CPR,
author = "Meng-Chiou Wu and Rung-Bin Lin and Shih-Cheng Tsai",
title = "Chip placement in a reticle for multiple-project wafer
fabrication",
journal = j-TODAES,
volume = "13",
number = "1",
pages = "22:1--22:??",
month = jan,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1297666.1297688",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:00 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Chip placement in a reticle is crucial to the cost of
a multiproject wafer run. In this article we develop
several chip placement methods based on the
volume-driven compatibility optimization (VOCO)
concept, which maximizes dicing compatibility among
chips with large-volume requirements while minimizing
reticle dimensions. Our mixed-integer linear
programming models with VOCO are too complex to render
good solutions for large test cases. Our B*-tree with
VOCO and HQ with VOCO use $ 16 \% \sim 29 \% $ fewer
wafers and $ 8 \% \sim 19 \% $ less reticle area than
the hierarchical quadrisection (HQ) method proposed by
Kahng et al. [2005]",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compatibility graph; conflict graph; mixed-integer
linear programming (MILP); Multiple-project wafers
(MPW); reticle floorplanning; set cover; set partition;
shuttle mask; simulated annealing (SA); wafer dicing",
}
@Article{Dutt:2008:Eb,
author = "Nikil Dutt",
title = "Editorial",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "23:1--23:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344419",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Saluja:2008:SBA,
author = "Nikhil Saluja and Kanupriya Gulati and Sunil P.
Khatri",
title = "{SAT}-based {ATPG} using multilevel compatible
don't-cares",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "24:1--24:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344420",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In a typical IC design flow, circuits are optimized
using multilevel don't cares. The computed don't cares
are discarded before Technology Mapping or Automatic
Test Pattern Generation (ATPG). In this paper, we
present two combinational ATPG algorithms for
combinational designs. These algorithms utilize the
multilevel don't cares that are computed for the design
during technology independent logic optimization. They
are based on Boolean Satisfiability (SAT), and utilize
the single stuck-at fault model. Both algorithms make
use of the Compatible Observability Don't Cares (CODCs)
associated with nodes of the circuit, to speed up the
ATPG process. For large circuits, both algorithms make
use of approximate CODCs (ACODCs), which we can compute
efficiently. Our first technique speeds up fault
propagation by modifying the active clauses in the
transitive fanout (TFO) of the fault site. In our
second technique, we define new j - active variables
for specific nodes in the transitive fanin (TFI) of the
fault site. Using these j-active variables we write
additional clauses to speed up fault justification.
Experimental results demonstrate that the combination
of these techniques (when using CODCs) results in an
average reduction of 45\% in ATPG runtimes. When ACODCs
are used, a speed-up of about 30\% is obtained in the
ATPG run-times for large designs. We compare our method
against a commercial structural ATPG tool as well. Our
method is slower for small designs, but for large
designs, we obtain a 31\% average speedup over the
commercial tool.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Automatic test pattern generation (ATPG); Boolean
satisfiabilty (SAT); don't cares; testing",
}
@Article{Muchherla:2008:NEW,
author = "Kishore Kumar Muchherla and Pinhong Chen and Dongsheng
Ma and Janet Meiling Wang",
title = "A noniterative equivalent waveform model for timing
analysis in presence of crosstalk",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "25:1--25:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344421",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to the nonuniform interconnect scaling in the Deep
Sub Micron (DSM) region, the coupling capacitance
between wires becomes an increasingly dominant fraction
of the total wire capacitance. This couple capacitance
introduces server crosstalk which causes delay
variations on signal lines and raises signal integrity
problems. Therefore, including crosstalk in the timing
analysis methods has become imperative for current
technologies. And to correctly model the crosstalk,
output loading effects, waveform shape and gate driving
capability have to be considered. However, most
existing crosstalk models have not yet included these
factors and consequently suffer from the low accuracy
problem. In this article, we propose a noniterative
equivalent waveform model that addresses the above
mentioned issues. Our experimental results have shown
that the new model achieves 3 times speed up and 95\%
accuracy compared to the existing models.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Deep sub micron; delay; equivalent waveform; noise;
timing analysis",
}
@Article{Yan:2008:TDO,
author = "Jin-Tai Yan",
title = "Timing-driven octilinear {Steiner} tree construction
based on {Steiner-point} reassignment and path
reconstruction",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "26:1--26:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344422",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "It is well known that the problem of constructing a
timing-driven rectilinear Steiner tree for any signal
net is important in performance-driven designs and has
been extensively studied. Until now, many efficient
approaches have been proposed for the construction of a
timing-driven rectilinear Steiner tree. As technology
process advances, $ + 45^\circ $ and $ - 45^\circ $
diagonal segments can be permitted in an octilinear
routing model. To our knowledge, no approach is
proposed to construct a timing-driven octilinear
Steiner tree for any signal net. In this paper, given a
rectilinear Steiner tree for any signal net, we propose
an efficient transformation-based approach to construct
a timing-driven octilinear Steiner tree based on the
computation of the octilinear distance and the concept
of Steiner-point reassignment and path reconstruction
in an octilinear routing model. The experimental
results show that our proposed transformation-based
approach can use reasonable CPU time to construct a
TOST, and a 10\%--18\% improvement in timing delay and
a 5\%--14\% improvement in total wire length in the
original RSTs are obtained in the construction of TOSTs
for the tested signal nets.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Elmore delay; Global routing; octilinear Steiner tree;
Steiner points",
}
@Article{Baldassin:2008:OSB,
author = "Alexandro Baldassin and Paulo Centoducatte and Sandro
Rigo and Daniel Casarotto and Luiz C. V. Santos and Max
Schultz and Olinto Furtado",
title = "An open-source binary utility generator",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "27:1--27:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344423",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Electronic system level (ESL) modeling allows early
hardware-dependent software (HDS) development. Due to
broad CPU diversity and shrinking time-to-market, HDS
development can neither rely on hand-retargeting binary
tools, nor can it rely on pre-existent tools within
standard packages. As a consequence, binary utilities
which can be easily adapted to new CPU targets are of
increasing interest. We present in this article a
framework for automatic generation of binary utilities.
It relies on two innovative ideas: platform-aware
modeling and more inclusive relocation handling.
Generated assemblers, linkers, disassemblers and
debuggers were validated for MIPS, SPARC, PowerPC,
i8051 and PIC16F84. An open-source prototype generator
is available for download.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Platform debugging; retargetable tools; TLM",
}
@Article{Moscola:2008:RCB,
author = "James Moscola and John W. Lockwood and Young H. Cho",
title = "Reconfigurable content-based router using
hardware-accelerated language parser",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "28:1--28:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344424",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a dense logic design for
matching multiple regular expressions with a field
programmable gate array (FPGA) at 10 + Gbps. It
leverages on the design techniques that enforce the
shortest critical path on most FPGA architectures while
optimizing the circuit size. The architecture is
capable of supporting a maximum throughput of 12. 90
Gbps on a Xilinx Virtex 4 LX200 and its performance is
linearly scalable with size. Additionally, this article
presents techniques for parsing data streams to provide
semantic information for patterns found within a data
stream. We illustrate how a content-based router can be
implemented with our parsing techniques using an XML
parser as an example. The content-based router
presented was designed, implemented, and tested in a
Xilinx Virtex XCV2000E FPGA on the FPX platform. It is
capable of processing 32-bits of data per clock cycle
and runs at 100 MHz. This allows the system to process
and route XML messages at 3. 2 Gbps.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "content-based routing; parser hardware; Parsing;
pattern matching; regular expressions; XML",
}
@Article{Jones:2008:RFI,
author = "Alex K. Jones and Swapna Dontharaju and Shenchih Tung
and Leo Mats and Peter J. Hawrylak and Raymond R. Hoare
and James T. Cain and Marlin H. Mickle",
title = "Radio frequency identification prototyping",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "29:1--29:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344425",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "While RFID is starting to become a ubiquitous
technology, the variation between different RFID
systems still remains high. This paper presents several
prototyping environments for different components of
radio frequency identification (RFID) tags to
demonstrate how many of these components can be
standardized for many different purposes. We include
two active tag prototypes, one based on a
microprocessor and the second based on custom hardware.
To program these devices we present a design automation
flow that allows RFID transactions to be described in
terms of primitives with behavior written in ANSI C
code. To save power with active RFID devices we
describe a passive transceiver switch called the
``burst switch'' and demonstrate how this can be used
in a system with a microprocessor or custom hardware
controller. Finally, we present a full RFID system
prototyping environment based on real-time spectrum
analysis technology currently deployed at the
University of Pittsburgh RFID Center of Excellence.
Using our prototyping techniques we show how
transactions from multiple standards can be combined
and targeted to several microprocessors include the
Microchip PIC, Intel StrongARM and XScale, and AD Chips
EISC as well as several hardware targets including the
Altera Apex, Actel Fusion, Xilinx Coolrunner II,
Spartan 3 and Virtex 2, and cell-based ASICs.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Design automation; low-power; prototyping; RFID",
}
@Article{Hu:2008:PSF,
author = "Yu Hu and Yan Lin and Lei He and Tim Tuan",
title = "Physical synthesis for {FPGA} interconnect power
reduction by dual-Vdd budgeting and retiming",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "30:1--30:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344426",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Field programmable dual-Vdd interconnects are
effective in reducing FPGA power. We formulate the
dual-Vdd-aware slack budgeting problem as a linear
program (LP) and a min-cost network flow problem,
respectively. Both algorithms reduce interconnect power
by 50\% on average compared to single-Vdd
interconnects, but the network-flow-based algorithm
runs 11x faster on MCNC benchmarks. Furthermore, we
develop simultaneous retiming and slack budgeting
(SRSB) with flip-flop layout constraints in dual-Vdd
FPGAs based on mixed integer linear programming, and
speed-up the algorithm by LP relaxation and local
legalization. Compared to retiming followed by slack
budgeting, SRSB reduces interconnect power by up to 28.
8\%.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "FPGA; Low power; retiming",
}
@Article{AlKhatib:2008:MSC,
author = "Iyad {Al Khatib} and Francesco Poletti and Davide
Bertozzi and Luca Benini and Mohamed Bechara and Hasan
Khalifeh and Axel Jantsch and Rustam Nabiev",
title = "A multiprocessor system-on-chip for real-time
biomedical monitoring and analysis: {ECG} prototype
architectural design space exploration",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "31:1--31:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344427",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article we focus on multiprocessor
system-on-chip (MPSoC) architectures for human heart
electrocardiogram (ECG) real time analysis as a
hardware/software (HW/SW) platform offering an advance
relative to state-of-the-art solutions. This is a
relevant biomedical application with good potential
market, since heart diseases are responsible for the
largest number of yearly deaths. Hence, it is a good
target for an application-specific system-on-chip (SoC)
and HW/SW codesign. We investigate a symmetric
multiprocessor architecture based on STMicroelectronics
VLIW DSPs that process in real time 12-lead ECG
signals. This architecture improves upon
state-of-the-art SoC designs for ECG analysis in its
ability to analyze the full 12 leads in real time, even
with high sampling frequencies, and its ability to
detect heart malfunction for the whole ECG signal
interval. We explore the design space by considering a
number of hardware and software architectural options.
Comparing our design with present-day solutions from an
SoC and application point-of-view shows that our
platform can be used in real time and without
failures.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "electrocardiogram algorithms; embedded system design;
hardware space exploration; Multiprocessor
system-on-chip; real time analysis",
}
@Article{Zhou:2008:HTC,
author = "Xiangrong Zhou and Peter Petrov",
title = "Heterogeneously tagged caches for low-power embedded
systems with virtual memory support",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "32:1--32:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344428",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "An energy-efficient data cache organization for
embedded processors with virtual memory is proposed.
Application knowledge regarding memory references is
used to eliminate most tag translations. A novel
tagging scheme is introduced, where both virtual and
physical tags coexist. Physical tags and special
handling of superset index bits are only used for
references to shared regions in order to avoid cache
inconsistency. By eliminating the need for most address
translations on cache access, a significant power
reduction is achieved. We outline an efficient hardware
architecture, where the application information is
captured in a reprogrammable way and the cache is
minimally modified.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Embedded systems",
}
@Article{Liu:2008:PVA,
author = "Fang Liu and Sule Ozev and Plamen K. Nikolov",
title = "Parametric variability analysis for multistage analog
circuits using analytical sensitivity modeling",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "33:1--33:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344429",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Process variations play an increasingly important role
on the success of analog circuits. State-of-the-art
analog circuits are based on complex architectures and
contain many hierarchical layers and parameters.
Knowledge of the parameter variances and their
contribution patterns is crucial for a successful
design process. This information is valuable to find
solutions for many problems in design, design
automation, testing, and fault tolerance. In this
article, we present a hierarchical variance analysis
methodology for multistage analog circuits. Starting
from the process/layout level, we derive implicit
hierarchical relations and extract the sensitivity
information analytically. We make use of previously
computed values whenever possible so as to reduce
computational time. The proposed approach is
particularly geared for the domain of design and test
automation, where multiple runs on slightly different
circuits are necessary. Experimental results indicate
that the proposed method provides both accuracy and
computational efficiency when compared with prior
approaches.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "analog circuits; Hierarchical variance analysis;
parameter correlations; performance model; process
variations",
}
@Article{Cheng:2008:FSI,
author = "Lei Cheng and Deming Chen and Martin D. F. Wong",
title = "A fast simultaneous input vector generation and gate
replacement algorithm for leakage power reduction",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "34:1--34:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344430",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Input vector control (IVC) technique is based on
the observation that the leakage current in a CMOS
logic gate depends on gate input state, and a good
input vector is able to minimize leakage when the
circuit is in sleep mode. The gate replacement
technique is a very effective method to further reduce
the leakage current. In this article, we propose a fast
heuristic algorithm to find a low-leakage input vector
with simultaneous gate replacement. Results on MCNC91
benchmark circuits show that our algorithm produces
14\% better leakage current reduction with several
orders of magnitude speedup in runtime for large
circuits compared to the previous state-of-the-art
algorithm. In particular, the average runtime for the
ten largest combinational circuits has been
dramatically reduced from 1879 seconds to 0.34
seconds.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "gate replacement; Input vector control; leakage
reduction",
}
@Article{Bernasconi:2008:OKS,
author = "Anna Bernasconi and Valentina Ciriani and Roberto
Cordone",
title = "The optimization of {kEP-SOPs}: {Computational}
complexity, approximability and experiments",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "35:1--35:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344431",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a new algebraic four-level expression
called k-EXOR-projected sum of products (kEP-SOP). The
optimization of a kEP-SOP is NP NP-hard, but can be
approximated within a fixed performance guarantee in
polynomial time. Moreover, fully testable circuits
under the stuck-at-fault model can be derived from
kEP-SOPs by adding at most a constant number of
multiplexer gates. The experiments show that the
computational time is very short and the results are
most of the time optimal with respect to the number of
products involved. kEP-SOPs also prove experimentally a
good starting point for general multilevel logic
synthesis.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "approximation algorithm; Automatic synthesis;
multilevel logic synthesis; optimization; testing",
}
@Article{Bahar:2008:IJA,
author = "R. Iris Bahar and Krishnendu Chakrabarty",
title = "Introduction to joint {ACM JETC\slash TODAES} special
issue on new, emerging, and specialized technologies",
journal = j-TODAES,
volume = "13",
number = "2",
pages = "36:1--36:??",
month = apr,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1344418.1344432",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jun 12 18:10:39 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dutt:2008:E,
author = "Nikil Dutt",
title = "Editorial",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "37:1--37:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367046",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jones:2008:ISS,
author = "Alex K. Jones and Robert Walker",
title = "Introduction to the special section on demonstrable
software systems and hardware platforms {II}",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "38:1--38:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367047",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kwon:2008:RPP,
author = "Seongnam Kwon and Yongjoo Kim and Woo-Chul Jeun and
Soonhoi Ha and Yunheung Paek",
title = "A retargetable parallel-programming framework for
{MPSoC}",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "39:1--39:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367048",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As more processing elements are integrated in a single
chip, embedded software design becomes more
challenging: It becomes a parallel programming for
nontrivial heterogeneous multiprocessors with diverse
communication architectures, and design constraints
such as hardware cost, power, and timeliness. In the
current practice of parallel programming with MPI or
OpenMP, the programmer should manually optimize the
parallel code for each target architecture and for the
design constraints. Thus, the design-space exploration
of MPSoC (multiprocessor systems-on-chip) costs become
prohibitively large as software development overhead
increases drastically. To solve this problem, we
develop a parallel-programming framework based on a
novel programming model called common intermediate code
(CIC). In a CIC, functional parallelism and data
parallelism of application tasks are specified
independently of the target architecture and design
constraints. Then, the CIC translator translates the
CIC into the final parallel code, considering the
target architecture and design constraints to make the
CIC retargetable. Experiments with preliminary
examples, including the H.263 decoder, show that the
proposed parallel-programming framework increases the
design productivity of MPSoC software significantly.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design-space exploration; embedded software;
multiprocessor system on chip; parallel-programming;
software generation",
}
@Article{Kumar:2008:MSS,
author = "Akash Kumar and Shakith Fernando and Yajun Ha and Bart
Mesman and Henk Corporaal",
title = "Multiprocessor systems synthesis for multiple
use-cases of multiple applications on {FPGA}",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "40:1--40:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367049",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Future applications for embedded systems demand chip
multiprocessor designs to meet real-time deadlines. The
large number of applications in these systems generates
an exponential number of use-cases. The key design
automation challenges are designing systems for these
use-cases and fast exploration of software and hardware
implementation alternatives with accurate performance
evaluation of these use-cases. These challenges cannot
be overcome by current design methodologies which are
semiautomated, time consuming, and error prone.\par
In this article, we present a design methodology to
generate multiprocessor systems in a systematic and
fully automated way for {\em multiple use-cases}.
Techniques are presented to merge multiple use-cases
into one hardware design to minimize cost and design
time, making it well suited for fast design-space
exploration (DSE) in MPSoC systems. Heuristics to
partition use-cases are also presented such that each
partition can fit in an FPGA, and all use-cases can be
catered for.\par
The proposed methodology is implemented into a tool for
Xilinx FPGAs for evaluation. The tool is also made
available online for the benefit of the research
community and is used to carry out a DSE case study
with multiple use-cases of real-life applications: H263
and JPEG decoders. The generation of the entire design
takes about 100 ms, and the whole DSE was completed in
45 minutes, including FPGA mapping and synthesis. The
heuristics used for use-case partitioning reduce the
design-exploration time elevenfold in a case study with
mobile-phone applications.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design exploration; FPGA; multi-application;
multimedia systems; multiple use-cases; multiprocessor
systems; synchronous data-flow graphs",
}
@Article{Krashinsky:2008:ISV,
author = "Ronny Krashinsky and Christopher Batten and Krste
Asanovi{\'c}",
title = "Implementing the {Scale} vector-thread processor",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "41:1--41:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367050",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Scale vector-thread processor is a
complexity-effective solution for embedded computing
which flexibly supports both vector and highly
multithreaded processing. The 7.1-million transistor
chip has 16 decoupled execution clusters, vector load
and store units, and a nonblocking 32KB cache. An
automated and iterative design and verification flow
enabled a performance-, power-, and area-efficient
implementation with two person-years of development
effort. Scale has a core area of 16.6 mm$^2$ in 180 nm
technology, and it consumes 400 mW--1.1 W while running
at 260 MHz.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "hybrid C++/Verilog simulation; iterative VLSI design
flow; multithreaded processors; procedural datapath
pre-placement; vector processors; vector-thread
processors",
}
@Article{Mishra:2008:SDD,
author = "Prabhat Mishra and Nikil Dutt",
title = "Specification-driven directed test generation for
validation of pipelined processors",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "42:1--42:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367051",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Functional validation is a major bottleneck in
pipelined processor design due to the combined effects
of increasing design complexity and lack of efficient
techniques for directed test generation. Directed test
vectors can reduce overall validation effort, since
shorter tests can obtain the same coverage goal
compared to the random tests. This article presents a
specification-driven directed test generation
methodology. The proposed methodology makes three
important contributions. First, a general graph model
is developed that can capture the structure and
behavior (instruction set) of a wide variety of
pipelined processors. The graph model is generated from
the processor specification. Next, we propose a
functional fault model that is used to define the
functional coverage for pipelined architectures.
Finally, we propose two complementary test generation
techniques: test generation using model checking, and
test generation using template-based procedures. These
test generation techniques accept the graph model of
the architecture as input and generate test programs to
detect all the faults in the functional fault model.
Our experimental results on two pipelined processor
models demonstrate several orders-of-magnitude
reduction in overall validation effort by drastically
reducing both test-generation time and number of test
programs required to achieve a coverage goal.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "functional validation; model checking; test
generation",
}
@Article{Joo:2008:ECP,
author = "Yongsoo Joo and Youngjin Cho and Donghwa Shin and
Jaehyun Park and Naehyuck Chang",
title = "An energy characterization platform for memory devices
and energy-aware data compression for multilevel-cell
flash memory",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "43:1--43:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367052",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Memory devices often consume more energy than
microprocessors in current portable embedded systems,
but their energy consumption changes significantly with
the type of transaction, data values, and access
timing, as well as depending on the total number of
transactions. These variabilities mean that an
innovative tool and framework are required to
characterize modern memory devices running in embedded
system architectures.\par
We introduce an energy measurement and characterization
platform for memory devices, and demonstrate an
application to multilevel-cell (MLC) flash memories, in
which we discover significant value-dependent
programming energy variations. We introduce an
energy-aware data compression method that minimizes the
flash programming energy, rather than the size of the
compressed data, which is formulated as an entropy
coding with unequal bit-pattern costs. Deploying a
probabilistic approach, we derive energy-optimal
bit-pattern probabilities and expected values of the
bit-pattern costs which are applicable to the large
amounts of compressed data typically found in
multimedia applications. Then we develop an
energy-optimal prefix coding that uses integer linear
programming, and construct a prefix-code table. From a
consideration of Pareto-optimal energy consumption, we
can make tradeoffs between data size and programming
energy, such as a 41\% energy savings for a 52\% area
overhead.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compression; flash memory; MLC",
}
@Article{Huffmire:2008:DSS,
author = "Ted Huffmire and Brett Brotherton and Nick Callegari
and Jonathan Valamehr and Jeff White and Ryan Kastner
and Tim Sherwood",
title = "Designing secure systems on reconfigurable hardware",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "44:1--44:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367053",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The extremely high cost of custom ASIC fabrication
makes FPGAs an attractive alternative for deployment of
custom hardware. Embedded systems based on
reconfigurable hardware integrate many functions onto a
single device. Since embedded designers often have no
choice but to use soft IP cores obtained from third
parties, the cores operate at different trust levels,
resulting in mixed-trust designs. The goal of this
project is to evaluate recently proposed security
primitives for reconfigurable hardware by building a
real embedded system with several cores on a single
FPGA and implementing these primitives on the system.
Overcoming the practical problems of integrating
multiple cores together with security mechanisms will
help us to develop realistic security-policy
specifications that drive enforcement mechanisms on
embedded systems.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Advanced Encryption Standard (AES); controlled
sharing; enforcement mechanisms; execution monitors;
Field programmable gate arrays (FPGAs); hardware
security; isolation; memory protection; reference
monitors; security policies; security primitives;
separation; static analysis; systems-on-a-chip (SoCs)",
}
@Article{Manolios:2008:AVS,
author = "Panagiotis Manolios and Sudarshan K. Srinivasan",
title = "Automatic verification of safety and liveness for
pipelined machines using {WEB} refinement",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "45:1--45:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367054",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We show how to automatically verify that complex
pipelined machine models satisfy the same safety and
liveness properties as their instruction-set
architecture (ISA) models by using well-founded
equivalence bisimulation (WEB) refinement. We show how
to reduce WEB-refinement proof obligations to formulas
expressible in the decidable logic of counter
arithmetic with lambda expressions and uninterpreted
functions (CLU). This allows us to automate the
verification of the pipelined machine models by using
the UCLID decision procedure to transform CLU formulas
to Boolean satisfiability problems. To relate pipelined
machine states to ISA states, we use the commitment and
flushing refinement maps. We evaluate our work using 17
pipelined machine models that contain various features,
including deep pipelines, precise exceptions, branch
prediction, interrupts, and instruction queues. Our
experimental results show that the overhead of proving
liveness, obtained by comparing the cost of proving
both safety and liveness with the cost of only proving
safety, is about 17\%, but depends on the refinement
map used; for example, the liveness overhead is 23\%
when flushing is used and is negligible when commitment
is used.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bisimulation; commitment; flushing; liveness;
pipelined machines; refinement; refinement maps; SAT;
verification",
}
@Article{Wu:2008:PVA,
author = "Huaizhi Wu and Martin D. F. Wong and Wilsin Gosti",
title = "Postplacement voltage assignment under performance
constraints",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "46:1--46:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367055",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multi-Vdd is an effective method to reduce both
leakage and dynamic power. A key challenge in a
multi-Vdd design is to control the complexity of the
power-supply system and limit the demand for level
shifters. This can be tackled by grouping cells of
different supply voltages into a small number of
voltage islands. Recently, an elegant algorithm was
proposed for generating voltage islands that balance
the power-versus-design-cost tradeoff under performance
requirement, according to the placement proximity of
the critical cells. One prerequisite of this algorithm
is an initial voltage assignment at the standard-cell
level that meets timing. In this article, we present a
novel method to produce quality voltage assignment
which not only meets timing but also forms good
proximity of the critical cells to provide a smooth
input to the aforementioned voltage island generation.
Our algorithm is based on effective delay budgeting and
efficient computation of physical proximity by Voronoi
diagram. Our extensive experiments on real industrial
designs show that our algorithm leads to 25\%--75\%
improvement in the voltage island generation in terms
of the number of voltage islands generated, with
computation time only linear to design size.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "low power; timing; voltage assignment; Voronoi
diagram",
}
@Article{Bombieri:2008:ROT,
author = "Nicola Bombieri and Franco Fummi and Graziano
Pravadelli",
title = "Reuse and optimization of testbenches and properties
in a {TLM-to-RTL} design flow",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "47:1--47:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367056",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In transaction-level modeling (TLM), verification
methodologies based on transactions allow testbenches,
properties, and IP cores in mixed TL-RTL designs to be
reused. However, no papers in the literature analyze
the effectiveness of transaction-based verification
(TBV) in comparison to the more traditional RTL
approach. The first contribution of this article is the
introduction of a functional-fault-model-based
methodology for demonstrating the effectiveness of
reuse through TBV. A second contribution is the
introduction of a similar methodology for efficient
property checking which identifies and removes
redundant properties prior to assertion-based
verification or model checking.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "fault models; functional verification; model checking;
TBV; TLM",
}
@Article{Inoue:2008:PVS,
author = "Hiroaki Inoue and Junji Sakai and Masato Edahiro",
title = "Processor virtualization for secure mobile terminals",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "48:1--48:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367057",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a processor virtualization architecture,
VIRTUS, to provide a dedicated domain for preinstalled
applications and virtualized domains for downloaded
native applications. With it, security-oriented
next-generation mobile terminals can provide any number
of domains for native applications. VIRTUS features
three new technologies, namely, VMM asymmetrization,
dynamic interdomain communication (IDC), and
virtualization-assist logic, and it is first in the
world to virtualize an ARM-based multiprocessor.
Evaluations have shown that VMM asymmetrization results
in significantly less performance degradation and LOC
increase than do other VMMs. Further, dynamic IDC
overhead is low enough, and virtualization-assist logic
can be implemented in a sufficiently small area.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "multiprocessor; processor virtualization",
}
@Article{Sanz:2008:CSS,
author = "Concepci{\'o}n Sanz and Manuel Prieto and Jos{\'e}
Ignacio G{\'o}mez and Antonis Papanikolaou and Miguel
Miranda and Francky Catthoor",
title = "Combining system scenarios and configurable memories
to tolerate unpredictability",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "49:1--49:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367058",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Process variability and the dynamism of new
applications increase the uncertainty of embedded
systems and force designers to use pessimistic
assumptions, which have a tremendous impact on both the
performance and energy consumption of their memory
organizations. In this article we introduce an
experimental framework which tries to mitigate the
effects of both sources of unpredictability. At compile
time, an extensive profiling helps us to detect system
scenarios and bounds application dynamism. At the
organization level, we incorporate a heterogeneous
memory architecture composed by several configurable
memories. A calibration process and a runtime control
system adapt the platform to the current application
needs. Our approach manages to reduce significantly the
energy overhead associated to both variability and
application dynamism (up to 60\%, according to our
simulations) without compromising the timing
constraints existing in our target domain of dynamic
periodic multimedia applications.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "parametric yield; process variation; variability
compensation",
}
@Article{Ozturk:2008:IBE,
author = "Ozcan Ozturk and Mahmut Kandemir",
title = "{ILP}-based energy minimization techniques for banked
memories",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "50:1--50:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367059",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Main memories can consume a significant portion of
overall energy in many data-intensive embedded
applications. One way of reducing this energy
consumption is banking, that is, dividing available
memory space into multiple banks and placing unused
(idle) memory banks into low-power operating modes.
Prior work investigated code-restructuring- and
data-layout-reorganization-based approaches for
increasing the energy benefits that could be obtained
from a banked memory architecture. This article
explores different techniques that can potentially
coexist within the same optimization framework for
maximizing benefits of low-power operating modes. These
techniques include employing nonuniform bank sizes,
data migration, data compression, and data replication.
By using these techniques, we try to increase the
chances for utilizing low-power operating modes in a
more effective manner, and achieve further energy
savings over what could be achieved by exploiting
low-power modes alone. Specifically, nonuniform banking
tries to match bank sizes with application-data access
patterns. The goal of data migration is to cluster data
with similar access patterns in the same set of banks.
Data compression reduces the size of the data used by
an application, and thus helps reduce the number of
memory banks occupied by data. Finally, data
replication increases bank idleness by duplicating
select read-only data blocks across banks. We formulate
each of these techniques as an ILP (integer linear
programming) problem, and solve them using a commercial
solver. Our experimental analysis using several
benchmarks indicates that all the techniques presented
in this framework are successful in reducing memory
energy consumption. Based on our experience with these
techniques, we recommend to compiler writers for banked
memories to consider data compression, replication, and
migration.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compilers; data compression; DRAM; low-power operating
modes; memory banking; migration; replication",
}
@Article{Das:2008:RSA,
author = "Sabyasachi Das and Sunil P. Khatri",
title = "Resource sharing among mutually exclusive
sum-of-product blocks for area reduction",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "51:1--51:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367060",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In state-of-the-art digital designs, arithmetic blocks
consume a major portion of the total area of the IC.
The arithmetic sum-of-product (SOP) is the most widely
used arithmetic block. Some of the examples of SOP are
adder, subtractor, multiplier, multiply-accumulator
(MAC), squarer, chain-of-adders, incrementor,
decrementor, etc. In this article, we introduce a
novel, area-efficient architecture to share different
SOP blocks which are used in a mutually exclusive
manner. We implement the core functions of the largest
SOP only once and reuse different parts of the core
subblocks for all other SOP operations with the help of
multiplexers. This architecture can be used in the
nontiming-critical paths of the design, to save
significant amounts of area. Our experimental data
shows that the proposed sharing-based architecture
results in about 37\% area savings compared to the
results obtained from a commercially available
best-in-class datapath synthesis tool. In addition, our
proposed shared implementation consumes about 18\% less
power. These improvements were verified on
placed-and-routed designs as well.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tseng:2008:PPD,
author = "I-Lun Tseng and Adam Postula",
title = "Partitioning parameterized 45-degree polygons with
constraint programming",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "52:1--52:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367061",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "An algorithm for partitioning parameterized 45-degree
polygons into parameterized trapezoids is proposed in
this article. The algorithm is based on the plane-sweep
technique and can handle polygons with complicated
constraints. The input to the algorithm consists of the
contour of a parameterized polygon to be partitioned
and a set of constraints for parameters of the contour.
The algorithm uses horizontal cuts only and generates a
number of nonoverlapping trapezoids whose union is the
original parameterized polygon. Processing of
constraints and coordinates that contain first-order
multiple-variable polynomials has been made possible by
incorporating the JaCoP constraint programming library.
The proposed algorithm has been implemented in Java
programming language and can be used as the basis to
build the trapezoidal corner stitching data structure
for parameterized VLSI layout masks.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "analog and mixed-signal design; parameterized layouts;
parameterized polygons; polygon decomposition;
trapezoidal corner stitching",
}
@Article{Sehgal:2008:PAS,
author = "Anuja Sehgal and Sudarshan Bahukudumbi and Krishnendu
Chakrabarty",
title = "Power-aware {SoC} test planning for effective
utilization of port-scalable testers",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "53:1--53:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367062",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Many system-on-chip (SoC) integrated circuits contain
embedded cores with different scan frequencies. To
better meet the test requirements for such
heterogeneous SoCs, leading tester companies have
recently introduced port-scalable testers, which can
simultaneously drive groups of channels at different
data rates. However, the number of tester channels
available for scan testing is limited; therefore, a
higher shift frequency can increase the test time for a
core if the resulting test access architecture reduces
the bit-width used to access it. We present a scalable
test planning technique that exploits port scalability
of testers to reduce SoC test time. We compare the
proposed heuristic optimization method to two baseline
methods based on prior works that use a single scan
data rate for all embedded cores. We also propose a
power-aware test planning technique to effectively
utilize port-scalable testers under constraints of test
power consumption. Experimental results are presented
for power-aware test scheduling to illustrate the
impact of power constraints on overall test time.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "integer linear programming; port-scalable testers; SoC
test; test access architecture",
}
@Article{Pecenka:2008:ESR,
author = "Tomas Pecenka and Lukas Sekanina and Zdenek Kotasek",
title = "Evolution of synthetic {RTL} benchmark circuits with
predefined testability",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "54:1--54:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367063",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a new real-world application of
evolutionary computing in the area of digital-circuits
testing. A method is described which enables to evolve
large synthetic RTL benchmark circuits with a
predefined structure and testability. Using the
proposed method, a new collection of synthetic
benchmark circuits was developed. These benchmark
circuits will be useful in a validation process of
novel algorithms and tools in the area of
digital-circuits testing. Evolved benchmark circuits
currently represent the most complex benchmark circuits
with a known level of testability. Furthermore, these
circuits are the largest that have ever been designed
by means of evolutionary algorithms. This work also
investigates suitable parameters of the evolutionary
algorithm for this problem and explores the limits in
the complexity of evolved circuits.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "benchmark circuit; evolvable hardware; testability
analysis",
}
@Article{Pedram:2008:E,
author = "Massoud Pedram",
title = "Editorial",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "55:1--55:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391963",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Guan:2008:SAP,
author = "Nan Guan and Qingxu Deng and Zonghua Gu and Wenyao Xu
and Ge Yu",
title = "Schedulability analysis of preemptive and
nonpreemptive {EDF} on partial runtime-reconfigurable
{FPGAs}",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "56:1--56:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391964",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Field Programmable Gate Arrays (FPGAs) are very
popular in today's embedded systems design, and Partial
Runtime-Reconfigurable (PRTR) FPGAs allow HW tasks to
be placed and removed dynamically at runtime. Hardware
task scheduling on PRTR FPGAs brings many challenging
issues to traditional real-time scheduling theory,
which have not been adequately addressed by the
research community compared to software task scheduling
on CPUs. In this article, we consider the
schedulability analysis problem of HW task scheduling
on PRPR FPGAs. We derive utilization bounds for several
variants of global preemptive/nonpreemptive EDF
scheduling, and compare the performance of different
utilization bound tests.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "FPGA; Real-time scheduling; reconfigurable devices",
}
@Article{Mukherjee:2008:HLC,
author = "Rajarshi Mukherjee and Song Liu and Seda Ogrenci Memik
and Somsubhra Mondal",
title = "A high-level clustering algorithm targeting dual
{V$_{dd}$ FPGAs}",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "57:1--57:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391965",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recent advanced power optimizations deployed in
commercial FPGAs, laid out a roadmap towards FPGA
devices that can be integrated into ultra low power
systems. In this article, we present a high-level
design tool to support the process of mapping an
application onto a FPGA device with dual supply
voltages. Our main contribution in this paper is an
algorithm, which creates voltage scaling ready clusters
by utilizing the timing slack available in the designs.
We propose to first create clusters of CLBs within a
given CLB-level netlist. This clustering algorithm
intends to group chains of CLBs possessing similar
amounts of timing slack along their critical path
together. Once these clusters are identified, they are
placed onto respective V$_{dd}$ partitions on the
device. We have evaluated different dual V$_{dd}$
fabrics and the potential gain in power consumption is
explored. When a subset of the logic blocks on the
device can be driven by low V$_{dd}$ levels (either
with a dedicated low V$_{dd}$ supply or with a
programmable selection between low and high V$_{dd}$
levels for these blocks) this affects placement and
routing. As a result the maximum frequency of the
designs may be affected. In order to evaluate the
overall impact of creating voltage islands, we measured
the Energy-Delay Product for our benchmark designs. We
observed that the Energy-Delay product can be decreased
by 26.9\% when the placement of the designs into
different voltage levels is guided by our clustering
algorithm.",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "clustering; Dynamic power; field programmable gate
arrays; partitioning; placement; voltage scaling",
}
@Article{Resano:2008:ESR,
author = "Javier Resano and Juan Antonio Clemente and Carlos
Gonzalez and Daniel Mozos and Francky Catthoor",
title = "Efficiently scheduling runtime reconfigurations",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "58:1--58:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391966",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to the emergence of portable devices that must run
complex dynamic applications there is a need for
flexible platforms for embedded systems. Runtime
reconfigurable hardware can provide this flexibility
but the reconfiguration latency can significantly
decrease the performance. When dealing with task
graphs, runtime support that schedules the
reconfigurations in advance can drastically reduce this
overhead. However, executing complex scheduling
heuristics at runtime may generate an excessive
penalty. Hence, we have developed a hybrid
design-time/runtime reconfiguration scheduling
heuristic that generates its final schedule at runtime
but carries out most computations at design-time. We
have tested our approach in a PowerPC 405 processor
embedded on a FPGA demonstrating that it generates a
very small runtime penalty while providing almost as
good schedules as a full runtime approach.",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "FPGAs; hardware multitasking; Reconfigurable
architectures; runtime/design-time scheduling",
}
@Article{Garg:2008:SLT,
author = "Siddharth Garg and Diana Marculescu",
title = "System-level throughput analysis for process variation
aware multiple voltage-frequency island designs",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "59:1--59:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391967",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The increasing variability in manufacturing process
parameters is expected to lead to significant
performance degradation in deep submicron technologies.
Multiple Voltage-Frequency Island (VFI) design styles
with fine-grained, process-variation aware clocking
have recently been shown to possess increased immunity
to manufacturing process variations. In this article,
we propose a theoretical framework that allows
designers to quantify the performance improvement that
is to be expected if they were to migrate from a fully
synchronous design to the proposed multiple VFI design
style. Specifically, we provide techniques to
efficiently and accurately estimate the probability
distribution of the execution rate (or throughput) of
both single and multiple VFI systems under the
influence of manufacturing process variations. Finally,
using an MPEG-2 encoder benchmark, we demonstrate how
the proposed analysis framework can be used by
designers to make architectural decisions such as the
granularity of VFI domain partitioning based on the
throughput constraints their systems are required to
satisfy.",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Globally asynchronous locally synchronous;
manufacturing process variations; maximum cycle mean;
performance analysis; system-level design;
voltage-frequency islands",
}
@Article{Ozturk:2008:APB,
author = "Ozcan Ozturk and Mahmut Kandemir and Guangyu Chen",
title = "Access pattern-based code compression for
memory-constrained systems",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "60:1--60:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391968",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As compared to a large spectrum of performance
optimizations, relatively less effort has been
dedicated to optimize other aspects of embedded
applications such as memory space requirements, power,
real-time predictability, and reliability. In
particular, many modern embedded systems operate under
tight memory space constraints. One way of addressing
this constraint is to compress executable code and data
as much as possible. While researchers on code
compression have studied efficient hardware and
software based code compression strategies, many of
these techniques do not take application behavior into
account; that is, the same compression/decompression
strategy is used irrespective of the application being
optimized. This article presents an
application-sensitive code compression strategy based
on control flow graph (CFG) representation of the
embedded program. The idea is to start with a memory
image wherein all basic blocks of the application are
compressed, and decompress only the blocks that are
predicted to be needed in the near future. When the
current access to a basic block is over, our approach
also decides the point at which the block could be
compressed. We propose and evaluate several compression
and decompression strategies that try to reduce memory
requirements without excessively increasing the
original instruction cycle counts. Some of our
strategies make use of profile data, whereas others are
fully automatic. Our experimental evaluation using
seven applications from the MediaBench suite and three
large embedded applications reveals that the proposed
code compression strategy is very successful in
practice. Our results also indicate that working at a
basic block granularity, as opposed to a procedure
granularity, is important for maximizing memory space
savings.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "CFG; code access pattern; code compression; Embedded
systems; memory optimization",
}
@Article{Baradaran:2008:CAM,
author = "Nastaran Baradaran and Pedro C. Diniz",
title = "A compiler approach to managing storage and memory
bandwidth in configurable architectures",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "61:1--61:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391969",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Configurable architectures offer the unique
opportunity of realizing hardware designs tailored to
the specific data and computational patterns of an
application code. Customizing the storage structures is
becoming increasingly important in mitigating the
continuing gap between memory latencies and internal
computing speeds. In this article we describe and
evaluate a compiler algorithm that maps the arrays of a
loop-based computation to internal storage structures,
either RAM blocks or discrete registers. Our objective
is to minimize the overall execution time while
considering the capacity and bandwidth constraints of
the storage resources. The novelty of our approach lies
in creating a single framework that combines high-level
compiler techniques with lower-level scheduling
information for mapping the data. We illustrate the
benefits of our approach for a set of image/signal
processing kernels using a Xilinx Virtex\TM{}
Field-Programmable Gate Array (FPGA). Our algorithm
leads to faster designs compared to the
state-of-the-art {\em custom data layout\/} mapping
technique, in some instances using less storage. When
compared to hand-coded designs, our results are
comparable in terms of execution time and resources,
but are derived in a minute fraction of the design
time.",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Compiler analysis; configurable architectures;
high-level hardware synthesis; storage allocation and
management",
}
@Article{Banerjee:2008:ASM,
author = "Ansuman Banerjee and Pallab Dasgupta and P. P.
Chakrabarti",
title = "Auxiliary state machines + context-triggered
properties in verification",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "62:1--62:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391970",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Formal specifications of interface protocols between a
design-under-test and its environment mostly consist of
two types of correctness requirements, namely (a) a set
of invariants that applies throughout the protocol
execution and (b) a set of {\em context-triggered\/}
properties that applies only when the protocol state
belongs to a specific set of contexts. To model such
requirements, an increasingly popular design choice in
the assertion IP design community has been the use of
abstract {\em context state machines\/} and
state-oriented properties. In this paper, we formalize
this modeling style and present algorithms for
verifying such specifications. Specifically, we present
a purely formal approach and a semi-formal approach for
verifying such specifications. We demonstrate the use
of this design style in modeling some of the industry
standard protocol descriptions and present encouraging
results.",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Panda:2008:SBV,
author = "S. K. Panda and Arnab Roy and P. P. Chakrabarti and
Rajeev Kumar",
title = "Simulation-based verification using {Temporally
Attributed Boolean Logic}",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "63:1--63:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391971",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a specification logic called Temporally
Attributed Boolean (TAB) Logic for Assertion Based
Verification, which allows us to: (i) represent
assertions succinctly, (ii) incorporate
data-orientation and (iii) associate timing to design
intentions. TAB Logic allows us to write specifications
functionally linking system variables from different
temporal contexts. We present examples to show the
motivation for this logic especially in the context of
high level modeling of complex real time systems. We
formally define TAB Logic, formulate the problem of
verification on a simulation trace and present
efficient algorithms to check TAB assertions, both
offline and online. We present results of application
of TAB Logic for Instruction Semantics and Bus
Transaction Verification of a bus integrated pipelined
processor core implementation. We also employ TAB Logic
to validate the Interrupt mode behavior of the
processor core implementation. Further, we show the
utility of TAB Logic in fault detection. Finally, we
demonstrate the applicability of TAB Logic in the
domain of simulation based verification of analog
circuits like Operational Amplifiers and DC-DC
Converters. We finally discuss the limitations of TAB
logic and conclude.",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Bus verification; instruction semantics verification;
interrupt testing; offline-online verification
algorithm; simulation based verification; temporal
logic; timing verification",
}
@Article{Wang:2008:LAS,
author = "Sying-Jyan Wang and Kuo-Lin Peng and Kuang-Cyun Hsiao
and Katherine Shu-Min Li",
title = "Layout-aware scan chain reorder for launch-off-shift
transition test coverage",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "64:1--64:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391972",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Launch-off-shift (LOS) is a popular delay test
technique for scan-based designs. However, it is
usually not possible to achieve good delay fault
coverage in LOS test due to conflicts in test vectors.
In this article, we propose a layout-based scan chain
ordering method to improve fault coverage for LOS test
with limited routing overhead. A fast and effective
algorithm is used to eliminate conflicts in test
vectors while at the same time restrict the extra scan
chain routing. This approach provides many advantages.
(1) The proposed method can improve delay fault
coverage for LOS test. (2) With layout information
taken into account, the routing penalty is limited, and
thus the impact on circuit performance will not be
significant. Experimental results show that the
proposed LOS test method achieves about the same level
of delay fault coverage as enhanced scan does, while
the average scan chain wire length is about 2.2 times
of the shortest scan chain.",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "scan chain ordering; Scan test; test generation;
transition faults",
}
@Article{Moiseev:2008:TAP,
author = "Konstantin Moiseev and Avinoam Kolodny and Shmuel
Wimer",
title = "Timing-aware power-optimal ordering of signals",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "65:1--65:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391973",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A computationally efficient technique for reducing
interconnect active power in VLSI systems is presented.
Power reduction is accomplished by simultaneous wire
spacing and net ordering, such that cross-capacitances
between wires are optimally shared. The existence of a
unique power-optimal wire order within a bundle is
proven, and a method to construct this order is
derived. The optimal order of wires depends only on the
activity factors of the underlying signals; hence, it
can be performed prior to spacing optimization. By
using this order of wires, optimality of the combined
solution is guaranteed (as compared with any other
ordering and spacing of the wires). Timing-aware power
optimization is enabled by simultaneously considering
timing criticality weights and activity factors for the
signals. The proposed algorithm has been applied to
various interconnect layouts, including wire bundles
from high-end microprocessor circuits in 65 nm
technology. Interconnect power reduction of 17\% on
average has been observed in such bundles.",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "interconnect optimization; power optimization; Wire
ordering; wire spacing",
}
@Article{Lu:2008:EDI,
author = "Chao-Hung Lu and Hung-Ming Chen and Chien-Nan Jimmy
Liu",
title = "Effective decap insertion in area-array {SoC}
floorplan design",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "66:1--66:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391974",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As VLSI technology enters the nanometer era, supply
voltages continue to drop due to the reduction of power
dissipation, but it makes power integrity problems even
worse. Employing decoupling capacitances (decaps) in
floorplan stage is a common approach to alleviating
supply noise problems. Previous researches overestimate
the decap budget and do not fully utilize the empty
space of the floorplan. A floorplan usually has a lot
of available space that can be used to insert the decap
without increasing the floorplan area. Therefore, the
goal of this work is to develop a better model to
calculate the required decap to solve the power supply
noise problem in area-array based designs, and increase
the usage of available space in the floorplan to reduce
the area overhead caused by decap insertion. The
experimental results of this work are encouraging.
Compared with previous approaches, our methodology
reduces 38\% of the decap budget in average for MCNC
benchmarks but can still meet the power supply noise
requirements. The final floorplan areas with decap are
also smaller than the numbers reported in previous
works.",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "decap insertion; floorplan; Power supply noise",
}
@Article{Moffitt:2008:CDF,
author = "Michael D. Moffitt and Jarrod A. Roy and Igor L.
Markov and Martha E. Pollack",
title = "Constraint-driven floorplan repair",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "67:1--67:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391975",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this work, we propose a new and efficient approach
to the {\em floorplan repair\/} problem, where violated
design constraints are satisfied by applying small
changes to an existing rough floorplan. Such a
floorplan can be produced by a human designer, a
scalable placement algorithm, or result from
engineering adjustments to an existing floorplan. In
such cases, overlapping modules must be separated, and
others may need to be repositioned to satisfy
additional requirements. Our algorithmic framework uses
an expressive graph-based encoding of constraints which
can reflect fixed-outline, region, proximity and
alignment constraints. By tracking the implications of
existing constraints, we resolve violations by imposing
gradual modifications to the floorplan, in an attempt
to preserve the characteristics of its initial design.
Empirically, our approach is effective at removing
overlaps and repairing violations that may occur when
design constraints are acquired and imposed
dynamically.",
acknowledgement = ack-nhfb,
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "constraints; Floorplanning; legalization",
}
@Article{Ozdal:2008:ORA,
author = "Muhammet Mustafa Ozdal and Martin D. F. Wong and
Philip S. Honsinger",
title = "Optimal routing algorithms for rectilinear pin
clusters in high-density multichip modules",
journal = j-TODAES,
volume = "13",
number = "4",
pages = "68:1--68:??",
month = sep,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1391962.1391976",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 1 16:09:32 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the circuit densities and transistor counts are
increasing, the package routing problem is becoming
more and more challenging. In this article, we study an
important routing problem encountered in typical
high-end MCM designs: routing within dense pin
clusters. Pin clusters are often formed by pins that
belong to the same functional unit or the same data
bus, and can become bottlenecks in terms of overall
routability. Typically, these clusters have irregular
shapes, which can be approximated with rectilinear
convex boundaries. Since such boundaries have often
irregular shapes, a traditional escape routing
algorithm may give unroutable solutions. In this
article, we study how the positions of escape terminals
on a convex boundary affect the overall routability.
For this purpose, we propose a set of necessary and
sufficient conditions to model routability outside a
rectilinear convex boundary. Given an escape routing
solution, we propose an optimal algorithm to select the
maximal subset of nets that are routable outside the
boundary. After that, we focus on an integrated
approach to consider routability constraints (outside
the boundary) during the actual escape routing
algorithm. Here, we propose an optimal algorithm to
find the best escape routing solution that satisfies
all routability constraints. Our experiments
demonstrate that we can reduce the number of layers by
17\% on the average, by using this integrated
methodology.",
acknowledgement = ack-nhfb,
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Escape routing; multi-chip modules; network flow",
}
@Article{Keinert:2009:SAE,
author = "Joachim Keinert and Martin Streub{\"u}hr and Thomas
Schlichter and Joachim Falk and Jens Gladigau and
Christian Haubelt and J{\"u}rgen Teich and Michael
Meredith",
title = "{SystemCoDesigner} --- an automatic {ESL} synthesis
approach by design space exploration and behavioral
synthesis for streaming applications",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455230",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With increasing design complexity, the gap from ESL
(Electronic System Level) design to RTL synthesis
becomes more and more crucial to many industrial
projects. Although several behavioral synthesis tools
exist to automatically generate synthesizable RTL code
from C/C++/SystemC-based input descriptions and
software generation for embedded processors is
automated as well, an efficient ESL synthesis
methodology combining both is still missing. This
article presents SystemCoDesigner, a novel
SystemC-based ESL tool to automatically optimize a
hardware/software SoC (System on Chip) implementation
with respect to several objectives. Starting from a
SystemC behavioral model, SystemCoDesigner
automatically extracts the mathematical model, performs
a behavioral synthesis step, and explores the
multiobjective design space using state-of-the-art
multiobjective optimization algorithms. During design
space exploration, a single design point is evaluated
by simulating highly accurate performance models, which
are automatically generated from the SystemC behavioral
model and the behavioral synthesis results. Moreover,
SystemCoDesigner permits the automatic generation of
bit streams for FPGA targets from any previously
optimized SoC implementation. Thus SystemCoDesigner is
the first fully automated ESL synthesis tool providing
a correct-by-construction generation of
hardware/software SoC implementations. As a case study,
a model of a Motion-JPEG decoder was automatically
optimized and implemented using SystemCoDesigner.
Several synthesized SoC variants based on this model
show different tradeoffs between required hardware
costs and achieved system throughput, ranging from
software-only solutions to pure hardware
implementations that reach real-time performance for
QCIF streams on a 50MHz FPGA.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "hardware/software codesign; System design",
}
@Article{Hansson:2009:CTC,
author = "Andreas Hansson and Kees Goossens and Marco Bekooij
and Jos Huisken",
title = "{CoMPSoC}: a template for composable and predictable
multi-processor system on chips",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455231",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A growing number of applications, often with firm or
soft real-time requirements, are integrated on the same
System on Chip, in the form of either hardware or
software intellectual property. The applications are
started and stopped at run time, creating different
use-cases. Resources, such as interconnects and
memories, are shared between different applications,
both within and between use-cases, to reduce silicon
cost and power consumption.\par
The functional and temporal behaviour of the
applications is verified by simulation and formal
methods. Traditionally, designers resort to monolithic
verification of the system as whole, since the
applications interfere in shared resources, and thus
affect each other's behaviour. Due to interference
between applications, the integration and verification
complexity grows exponentially in the number of
applications, and the task to verify correct behaviour
of concurrent applications is on the system designer
rather than the application designers.\par
In this work, we propose a Composable and Predictable
Multi-Processor System on Chip (CoMPSoC) platform
template. This scalable hardware and software template
removes all interference between applications through
resource reservations. We demonstrate how this enables
a divide-and-conquer design strategy, where all
applications, potentially using different programming
models and communication paradigms, are developed and
verified independently of one another. Performance is
analyzed per application, using state-of-the-art
dataflow techniques or simulation, depending on the
requirements of the application. These results still
apply when the applications are integrated onto the
platform, thus separating system-level design and
application design.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Composable; model of computation; network on chip;
predictable; system on chip",
}
@Article{Gheorghita:2009:SSB,
author = "Stefan Valentin Gheorghita and Martin Palkovic and
Juan Hamers and Arnout Vandecappelle and Stelios
Mamagkakis and Twan Basten and Lieven Eeckhout and Henk
Corporaal and Francky Catthoor and Frederik Vandeputte
and Koen {De Bosschere}",
title = "System-scenario-based design of dynamic embedded
systems",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455232",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the past decade, real-time embedded systems have
become much more complex due to the introduction of a
lot of new functionality in one application, and due to
running multiple applications concurrently. This
increases the dynamic nature of today's applications
and systems, and tightens the requirements for their
constraints in terms of deadlines and energy
consumption. State-of-the-art design methodologies try
to cope with these novel issues by identifying several
most used cases and dealing with them separately,
reducing the newly introduced complexity. This article
presents a generic and systematic design-time/run-time
methodology for handling the dynamic nature of modern
embedded systems, which can be utilized by existing
design methodologies to increase their efficiency. It
is based on the concept of {\em system scenarios},
which group system behaviors that are similar from a
multidimensional cost perspective --- such as resource
requirements, delay, and energy consumption --- in such
a way that the system can be configured to exploit this
cost similarity. At design-time, these scenarios are
individually optimized. Mechanisms for predicting the
current scenario at run-time, and for switching between
scenarios, are also derived. This design trajectory is
augmented with a run-time calibration mechanism, which
allows the system to learn on-the-fly during its
execution, and to adapt itself to the current input
stimuli, by extending the scenario set, changing the
scenario definitions, and both the prediction and
switching mechanisms. To show the generality of our
methodology, we show how it has been applied on four
very different real-life design problems. In all
presented case studies, substantial energy reductions
were obtained by exploiting scenarios.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Design methodology; dynamic nature; embedded systems;
energy reduction; real-time systems; system scenarios",
}
@Article{Xu:2009:STA,
author = "Qiang Xu and Yubin Zhang and Krishnendu Chakrabarty",
title = "{SOC} test-architecture optimization for the testing
of embedded cores and signal-integrity faults on
core-external interconnects",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455233",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The test time for core-external interconnect shorts
and opens is typically much less than that for
core-internal logic. Therefore, prior work on
test-infrastructure design for core-based
system-on-a-chip (SOC) has mainly focused on minimizing
the test time for core-internal logic. However, as
feature sizes shrink for newer process technologies,
the test time for signal integrity (SI) faults on
interconnects cannot be neglected. The test time for SI
faults can be comparable to, or even larger than, the
test time for the embedded cores. We investigate the
impact of interconnect SI tests on SOC
test-architecture design and optimization. A compaction
method for SI faults and algorithms for
test-architecture optimization are also presented.
Experimental results for the ITC'02 benchmarks show
that the proposed approach can significantly reduce the
overall testing time for core-internal logic and
core-external interconnects.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Core-based system-on-chip; interconnect testing; test
scheduling; test-access mechanism (TAM)",
}
@Article{Jin:2009:GND,
author = "Zhong-Yi Jin and Curt Schurgers and Rajesh K. Gupta",
title = "A gateway node with duty-cycled radio and processing
subsystems for wireless sensor networks",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455234",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Wireless sensor nodes are increasingly being tasked
with computation and communication intensive functions
while still subject to constraints related to energy
availability. On these embedded platforms, once all low
power design techniques have been explored,
duty-cycling the various subsystems remains the primary
option to meet the energy and power constraints. This
requires the ability to provide spurts of high MIPS and
high bandwidth connections. However, due to the large
overheads associated with duty-cycling the computation
and communication subsystems, existing high performance
sensor platforms are not efficient in supporting such
an option. In this article, we present the design and
optimizations taken in a wireless gateway node (WGN)
that bridges data from wireless sensor networks to
Wi-Fi networks in an on-demand basis. We discuss our
strategies to reduce duty-cycling related costs by
partitioning the system and by reducing the amount of
time required to activate or deactivate the
high-powered components. We compare the design choices
and performance parameters with those made in the Intel
{\em Stargate\/} platform to show the effectiveness of
duty-cycling on our platform. We have built a working
prototype, and the experimental results with two
different power management schemes show significant
reductions in latency and average power consumption
compared to the {\em Stargate}. The WGN running our
power-gating scheme performs about six times better in
terms of average system power consumption than the {\em
Stargate\/} running the suspend-system scheme for large
working-periods where the active power dominates. For
short working-periods where the transition
(enable/disable) power becomes dominant, we perform up
to seven times better. The comparative performance of
our system is even greater when the sleep power
dominates.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Embedded systems; gateway; power savings; sensor
nodes",
}
@Article{Wu:2009:EER,
author = "Chin-Hsien Wu",
title = "An energy-efficient {I/O} request mechanism for
multi-bank flash-memory storage systems",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455235",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Emerging critical issues for flash-memory storage
systems, especially with regard to implementation
within many embedded systems, are the programmed I/O
nature of data transfers and their energy-efficient
nature. We propose an I/O request mechanism in the
Memory-Technology-Device (MTD) layer to exploit the
programmed I/O-based data transfers for flash-memory
storage systems. We propose to revise the waiting
function in the Memory-Technology-Device (MTD) layer to
relieve the microprocessor from busy-waiting, in order
to make more CPU cycles available for other tasks. An
energy-efficient mechanism based on the I/O request
mechanism is also presented for multi-bank flash-memory
storage systems, which particularly focuses on
switching the power state of each flash-memory bank. We
demonstrate that the energy-efficient I/O request
mechanism not only saves more CPU cycles to execute
other tasks, but also reduces the energy consumption of
flash-memory, based on experiments incorporating
realistic system workloads.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "embedded systems; energy-efficient; Flash Memory;
programmed I/O; storage systems",
}
@Article{Dontharaju:2009:DAP,
author = "Swapna Dontharaju and Shenchih Tung and James T. Cain
and Leonid Mats and Marlin H. Mickle and Alex K.
Jones",
title = "A design automation and power estimation flow for
{RFID} systems",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455236",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "While RFID has become a ubiquitous technology, there
is still a need for RFID systems with different
capabilities, protocols, and features depending on the
application. This article describes a design automation
flow and power estimation technique for fast
implementation and design feedback of new RFID systems.
Physical layer features are described using {\em
waveform features}, which are used to automatically
generate physical layer encoding and decoding hardware
blocks. {\em RFID primitives\/} to be supported by the
tag are enumerated with {\em RFID macros\/} and the
behavior of each primitive is specified using ANSI-C
within the template to automatically generate the tag
controller. Case studies implementing widely used
standards such as ISO 18000 Part 7 and ISO 18000 Part
6C using this automation technique are presented. The
power macromodeling flow demonstrated here is shown to
be within 5\% to 10\% accuracy, while providing results
100 times faster than traditional methods. When
eliminating the need for certain features of ISO 18000
Part 6C, the design flow shows that the power required
by the implementation is reduced by nearly 50\%.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design automation; low-power; prototyping; RFID",
}
@Article{Dasdan:2009:PEA,
author = "Ali Dasdan",
title = "Provably efficient algorithms for resolving temporal
and spatial difference constraint violations",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455237",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A system of difference constraints is a formal model
of temporal and spatial constraints in many areas such
as scheduling, constraint satisfaction, and layout
compaction. During construction of such a system,
constraint violations often arise, and they need to be
resolved. Previous algorithms for this task fall into
two groups: those algorithms that are fast but cannot
resolve all violations, and those algorithms that can
resolve all violations but are exponentially slow. We
propose the first algorithms that are fast as well as
able to resolve all violations. Moreover, unlike the
previous algorithms, our algorithms support the
ordering of violations using their inherent criticality
or user-defined priority. We provably and
experimentally justify the efficiency and efficacy of
our algorithms.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Behavioral synthesis; constraint satisfaction;
interface timing; layout compaction; multimedia
synchronization; rate analysis; real-time systems;
scheduling; timing constraints",
}
@Article{Sinha:2009:DIC,
author = "Arnab Sinha and Pallab Dasgupta and Bhaskar Pal and
Sayantan Das and Prasenjit Basu and P. P. Chakrabarti",
title = "Design intent coverage revisited",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455238",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "{\em Design intent coverage\/} is a formal methodology
for analyzing the gap between a formal architectural
specification of a design and the formal functional
specifications of the component RTL blocks of the
design. In this article we extend the design intent
coverage methodology to hybrid specifications
containing both state-machines and formal properties.
We demonstrate the benefits of this extension in two
domains of considerable recent interest, namely (a) the
use of auxiliary state-machines in formal
specifications, and (b) the use of modest sized RTL
blocks in the design intent coverage analysis.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Design Intent Coverage",
}
@Article{Yang:2009:MCS,
author = "Zijiang Yang and Chao Wang and Aarti Gupta and Franjo
Ivanv{\v{c}}i{\'c}",
title = "Model checking sequential software programs via mixed
symbolic analysis",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455239",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present an efficient symbolic search algorithm for
software model checking. Our algorithms perform
word-level reasoning by using a combination of decision
procedures in Boolean and integer and real domains, and
use novel symbolic search strategies optimized
specifically for sequential programs to improve
scalability. Experiments on real-world C programs show
that the new symbolic search algorithms can achieve
several orders-of-magnitude improvements over existing
methods based on bit-level (Boolean) reasoning.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "binary decision diagram; composite symbolic formula;
image computation; Model checking; Presburger
arithmetic; reachability analysis",
}
@Article{Mehta:2009:ICH,
author = "Gayatri Mehta and Justin Stander and Mustafa Baz and
Brady Hunsaker and Alex K. Jones",
title = "Interconnect customization for a hardware fabric",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455240",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article describes several multiplexer-based
interconnection strategies designed to improve energy
consumption of stripe-based coarse-grain reconfigurable
fabrics. Application requirements for the architecture
as well as two dense subgraphs are extracted from a
suite of signal and image processing benchmarks. These
statistics are used to drive the strategy of the
composition of multiplexer-based interconnect. The
article compares interconnects that are fully connected
between stripes, those with a cardinality of 8:1 to
4:1, and extensions that provide a 5:1 cardinality,
limited 6:1 cardinality, and hybrids between 5:1 and
3:1 cardinalities. Additionally, dedicated vertical
routes are considered replacing some computational
units with dedicated pass-gates. Using a fabric
interconnect model (FIM) written in XML, we demonstrate
that fabric instances and mappers can be automatically
generated using a Web-based design flow. Upon testing
these instances, we found that using an 8:1 cardinality
interconnect with 33\% of the computational units
replaced with dedicated pass-gates provided the best
energy versus mappability tradeoff, resulting in a 50\%
energy improvement over fully connected rows and 20\%
energy improvement over an 8:1 cardinality interconnect
without dedicated vertical routes.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "architecture; computer-aided design; demonstrable;
hardware fabric; low-energy; Reconfigurable",
}
@Article{Sham:2009:CPE,
author = "Chiu-Wing Sham and Evangeline F. Y. Young and Jingwei
Lu",
title = "Congestion prediction in early stages of physical
design",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455241",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Routability optimization has become a major concern in
physical design of VLSI circuits. Due to the recent
advances in VLSI technology, interconnect has become a
dominant factor of the overall performance of a
circuit. In order to optimize interconnect cost, we
need a good congestion estimation method to predict
routability in the early designing stages. Many
congestion models have been proposed but there's still
a lot of room for improvement. Besides, routers will
perform rip-up and reroute operations to prevent
overflow, but most models do not consider this case.
The outcome is that the existing models will usually
underestimate the routability. In this paper, we have a
comprehensive study on our proposed congestion models.
Results show that the estimation results of our
approaches are always more accurate than the previous
congestion models.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Estimation; floorplanning; placement",
}
@Article{Zhu:2009:ESA,
author = "Yi Zhu and Yuanfang Hu and Michael B. Taylor and
Chung-Kuan Cheng",
title = "Energy and switch area optimizations for {FPGA} global
routing architectures",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455242",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Low energy and small switch area usage are two
important design objectives in FPGA global routing
architecture design. This article presents an improved
MCF model based CAD flow that performs aggressive
optimizations, such as topology and wire style
optimization, to reduce the energy and switch area of
FPGA global routing architectures. The experiments show
that when compared to traditional mesh architecture,
the optimized FPGA routing architectures achieve up to
10\% to 15\% energy savings and up to 20\% switch area
savings in average for a set of seven benchmark
circuits.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "FPGA; global routing; low power",
}
@Article{Huang:2009:OPR,
author = "Shih-Hsu Huang and Chia-Ming Chang and Yow-Tyng Nieh",
title = "Opposite-phase register switching for peak current
minimization",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455243",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In a synchronous sequential circuit, huge current
peaks are often observed at the moment of clock
transition (since all registers are clocked). Previous
works focus on reducing the number of switching
registers. However, even though the switching registers
are the same, different combinations of switching
directions still result in different peak currents.
Based on that observation, in this article, we propose
an ECO (engineering change order) approach to minimize
the peak current by considering the switching
directions of registers. Our approach is well suitable
for reducing the peak current in IC testing.
Experimental data consistently show that our approach
works well in practice.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "IC testing; Logic synthesis; peak current; sequential
circuit synthesis",
}
@Article{Lin:2009:SCD,
author = "Yen-Chun Lin and Li-Ling Hung",
title = "Straightforward construction of depth-size optimal,
parallel prefix circuits with fan-out 2",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455244",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Prefix computation is used in various areas and is
considered as a primitive operation. Parallel prefix
circuits are parallel prefix algorithms on the
combinational circuit model. The depth of a prefix
circuit is a measure of its processing time; smaller
depth implies faster computation. The size of a prefix
circuit is the number of operation nodes in it. Smaller
size implies less power consumption, less VLSI area,
and less cost. A prefix circuit with $n$ inputs is
depth-size optimal if its depth plus size equals $ 2 n
- 2$. A circuit with a smaller fan-out is in general
faster and occupies less VLSI area. To be of practical
use, the depth and fan-out of a prefix circuit should
be small. In this paper, a family of depth-size
optimal, parallel prefix circuits with fan-out 2 is
presented. This family of prefix circuits is easier to
construct and more amenable to automatic synthesis than
two other families of the same type, although the three
families have the same minimum depth among all
depth-size optimal prefix circuits with fan-out 2. The
balanced structure of the new family is also a merit.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Depth-size optimal; fan-out; parallel prefix
circuits",
}
@Article{Kahng:2009:LAA,
author = "Andrew B. Kahng and Chul-Hong Park and Puneet Sharma
and Qinke Wang",
title = "Lens aberration aware placement for timing yield",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455245",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Process variations due to lens aberrations are to a
large extent systematic, and can be modeled for
purposes of analyses and optimizations in the design
phase. Traditionally, variations induced by lens
aberrations have been considered random due to their
small extent. However, as process margins reduce, and
as improvements in reticle enhancement techniques
control variations due to other sources with increased
efficacy, lens aberration-induced variations gain
importance. For example, our experiments indicate that
delays of most cells in the Artisan TSMC 90nm library
are affected by 2--8\% due to lens aberration.
Aberration-induced variations are systematic and depend
on the location in the lens field. In this article, we
first propose an aberration-aware timing analysis flow
that accounts for aberration-induced cell delay
variations. We then propose an aberration-aware
timing-driven analytical placement approach that
utilizes the predictable slow and fast regions created
on the chip due to aberration to improve cycle time. We
study the dependence of our improvement on chip size,
as well as use of the technique along with field
blading which allows partial reticle exposure. We
evaluate our technique on two testcases, {\em AES\/}
and {\em JPEG\/} implemented in 90nm technology. The
proposed technique reduces cycle time by 4.322\% (80ps)
at the cost of 1.587\% increase in trial-routed
wirelength for AES. On JPEG, we observe a cycle time
reduction of 5.182\% (132ps) at the cost of 1.095\%
increase in trial-routed wirelength.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design for manufacturing; Layout; lithography; timing
yield",
}
@Article{Chien:2009:SMV,
author = "Chih-Da Chien and Cheng-An Chien and Jui-Chin Chu and
Jiun-In Guo and Ching-Hwa Cheng",
title = "A {252Kgates\slash 4.9Kbytes SRAM\slash 71mW}
multistandard video decoder for high definition video
applications",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455246",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes a low-cost, low-power
multistandard video decoder for high definition (HD)
video applications. The proposed design supports
multiple-standard (JPEG baseline, MPEG-1/2/4 Simple
Profile (SP), and H.264 Baseline Profile (BP)) video
decoding through interactive parsing control and common
parameter bus interface. In order to reduce hardware
cost, the shared adder-based structure and reusable
data management are proposed to achieve hardware
sharing and reduce internal memory size, respectively.
In addition, the proposed design is optimized through
reducing memory bandwidth by increasing both data reuse
amount and burst length of memory access as well as
eliminating cycle overhead in data access for
supporting HD video decoding with single AHB-based SDR
memory. The proposed 252Kgates/4.9kB/71mW/0.13$ \mu $ m
multi-standard video decoder reduces 72\% in gate count
and 87\% in power consumption as compared to the
state-of-the-art design, when operating at 120MHz for
real-time HD1080 video decoding with single AHB-based
SDR memory.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "H.264; MPEG; Video decoder",
}
@Article{Reviriego:2009:EED,
author = "Pedro Reviriego and Juan Antonio Maestro",
title = "Efficient error detection codes for multiple-bit upset
correction in {SRAMs} with {BICS}",
journal = j-TODAES,
volume = "14",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1455229.1455247",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 26 18:12:50 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Memories are one of the most widely used elements in
electronic systems, and their reliability when exposed
to Single Events Upsets (SEUs) has been studied
extensively. As transistor sizes shrink, Multiple Bits
Upsets (MBUs) are becoming an increasingly important
factor in the reliability of memories exposed to
radiation effects. To address this issue, Built-in
Current Sensors (BICS) have recently been applied in
conjunction with Single Error Correction/Double Error
Detection (SEC-DED) codes to protect memories from
MBUs. In this article, this approach is taken one step
further, proposing specific codes optimized to be
combined with BICS to provide protection against MBUs
in memories. By exploiting the locality of errors
within an MBU and the error detection and location
capabilities of BICS, the proposed codes result in both
a better protection level and a reduced cost compared
with the existing SEC-DED approach.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "error correcting codes; Fault tolerant memory;
high-level protection technique; protection against
radiation",
}
@Article{Avnit:2009:PCC,
author = "K. Avnit and V. D'silva and A. Sowmya and S. Ramesh
and S. Parameswaran",
title = "Provably correct on-chip communication: a formal
approach to automatic protocol converter synthesis",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497562",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Hardware module reuse is a standard solution to the
problems of increasing complexity of chip architectures
and pressure to reduce time to market. In the absence
of a single module interface standard, predesigned
modules for ``plug-and-play'' usually require a
converter between incompatible interface protocols.
Current approaches to automatic synthesis of protocol
converters mostly lack formal foundations and either
employ abstractions far removed from the HDL
implementation level or grossly simplify the structure
of the protocols considered. This work presents a
state-machine-based formalism for modeling bus-based
communication protocols and a notion of protocol
compatibility and of correct conversion between
incompatible protocols. This formalism is used to
derive algorithms for checking protocol compatibility
and for provably correct, automatic converter
synthesis. Experiments with automatic converter
synthesis between different configurations of widely
used commercial bus protocols, such as AMBA AHB, ASB
APB, and the Open Core Protocol (OCP) are discussed.
The work here is unique in its combination of a
completely formal approach and the use of a low
abstraction level that enables precise modeling of
protocol characteristics that is also close to HDL.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "automatic design; converter synthesis; protocol
compatibility; System-on-chip",
}
@Article{Pasricha:2009:SLP,
author = "Sudeep Pasricha and Young-Hwan Park and Nikil Dutt and
Fadi J. Kurdahi",
title = "System-level {PVT} variation-aware power exploration
of on-chip communication architectures",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497563",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the shift towards deep submicron (DSM)
technologies, the increase in leakage power and the
adoption of power-aware design methodologies have
resulted in potentially significant variations in power
consumption under different process, voltage, and
temperature (PVT) corners. In this article, we first
investigate the impact of PVT corners on power
consumption at the system-on-chip (SoC) level,
especially for the on-chip communication
infrastructure. Given a target technology library, we
then show how it is possible to ``scale up'' and
abstract the PVT variability at the system level,
allowing characterization of the PVT-aware design space
early in the design flow. We conducted several
experiments to estimate power for PVT corner cases, at
the gate level, as well as at the higher system level.
Our preliminary results are very interesting, and
indicate that (i) there are significant variations in
power consumption across PVT corners; and (ii) the
PVT-aware power estimation problem may be amenable to a
reasonably simple abstraction at the system level.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "digital systems; high-level synthesis; on-chip
communication architectures; performance exploration;
power estimation; PVT variation",
}
@Article{Mukhopadhyay:2009:IAA,
author = "Rajdeep Mukhopadhyay and S. K. Panda and Pallab
Dasgupta and John Gough",
title = "Instrumenting {AMS} assertion verification on
commercial platforms",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497564",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The industry trend appears to be moving towards
designs that integrate large digital circuits with
multiple analog/RF (radio frequency) interfaces. In the
verification of these large integrated circuits, the
number of nets that need to be monitored has been
growing rapidly. Consequently, the mixed-signal design
community has been feeling the need for AMS (Analog and
Mixed Signal) assertions that can automatically monitor
conformance with expected time-domain behavior and help
in debugging deviations from the design intent. The
main challenges in providing this support are (a)
developing AMS assertion languages or AMS verification
libraries, and (b) instrumenting existing commercial
simulators to support assertion verification during
simulation. In this article, we report two approaches:
the first extends the {\em Open Verification Library\/}
(OVL) to the AMS domain by integrating a new collection
of AMS verification libraries; while the second extends
{\em SystemVerilog Assertions\/} (SVA) by augmenting
analog predicates into SVA. We demonstrate the use of
AMS-OVL on the Cadence Virtuoso environment while
emphasizing that our libraries can work in any
environment that supports Verilog and Verilog-A. We
also report the development of tool support for AMS-SVA
using a combination of Cadence NCSIM and Synopsys VCS.
We demonstrate the utility of both approaches on the
verification of LP3918, an integrated power management
unit (PMU) from National Semiconductors. We believe
that in the absence of existing EDA (Electronic Design
Automation) tools for AMS assertion verification, the
proposed approaches of integrating our libraries and
our tool sets with existing commercial simulators will
be of considerable and immediate practical value.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Assertion; integrated mixed signal design; OVL;
simulation; SVA; verification library",
}
@Article{Palkovic:2009:TOL,
author = "Martin Palkovic and Francky Catthoor and Henk
Corporaal",
title = "Trade-offs in loop transformations",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497565",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Nowadays, multimedia systems deal with huge amounts of
memory accesses and large memory footprints. To
alleviate the impact of these accesses and reduce the
memory footprint, high-level memory exploration and
optimization techniques have been proposed. These
techniques try to more efficiently utilize the memory
hierarchy. An important step in these optimization
techniques are loop transformations (LT). They have a
crucial effect on later data memory footprint
optimization steps and code generation. However, the
state-of-the-art work has focused only on individual
objectives. The main one in literature involves
improving the locality of data accesses, and thus
reducing the data memory footprint. It does not
consider the trade-offs in the LT step in relation to
successive optimization steps. Therefore, it is not
globally efficient in mapping the application on the
target platform.\par
In this article we will discuss several trade-offs
during the loop transformations. To our knowledge, we
are the first ones considering these global trade-offs.
Previous work always gave mostly one solution, having
the best locality and thus the optimized memory
footprint, even though some research in two-dimensional
trade-offs in this area exists as well. We start from
this state-of-the-art solution with minimal footprint.
We show that by sacrificing the footprint, we can
obtain gains in data reuse (crucial for energy
reduction) and reduce the control-flow complexity. We
demonstrate our approach on a real-life application,
namely the QSDPCM video coder. At the end, we show that
considering trade-offs for this application leads to
16\% energy reduction in a two-layer memory subsystem
and 10\% cycle reduction on the ARM platform.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "cost components; Data transfer and storage
exploration; loop transformations; optimization;
trade-offs",
}
@Article{Fummi:2009:CMH,
author = "Franco Fummi and Mirko Loghi and Massimo Poncino and
Graziano Pravadelli",
title = "A cosimulation methodology for {HW\slash SW}
validation and performance estimation",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497566",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Cosimulation strategies allow us to simulate and
verify HW/SW embedded systems before the real platform
is available. In this field, there is a large variety
of approaches that rely on different communication
mechanisms to implement an efficient interface between
the SW and the HW simulators. However, the literature
lacks a comprehensive methodology which addresses the
need for integrating and synchronizing heterogeneous
simulators, like, for example, the SystemC simulation
kernel for HW modules and an instruction set simulator
for SW applications, without being intrusive for the HW
and SW descriptions involved in the simulation. In this
context, this article presents, compares, and
integrates in a system-level framework two different
co-simulation strategies for modeling, analyzing, and
validating the performance of a HW/SW embedded system.
Moreover, for both of them, a mechanism is proposed to
provide an accurate time synchronization of the HW/SW
communication. The first strategy is intended to
provide an early cosimulation environment where HW/SW
interaction can be validated without involving the
operating system. The communication is implemented
between a single SW task and a SystemC description of
an HW module by exploiting the features of the remote
debugging interface of a debugger (the GNU GDB), and by
modifying the SystemC simulation kernel. On the other
hand, the second strategy is intended to be used in
further development steps, when the operating system is
introduced to validate the cosimulation between HW
modules and multitasking SW applications. In this
approach, the communication is implemented via
interrupts by using the features offered by the
operating system.\par
Experimental results are reported on two different case
studies to analyze and compare the effectiveness of
both the approaches.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Embedded Systems; HW/SW co-simulation; HW/SW
validation",
}
@Article{Inoue:2009:DSD,
author = "Hiroaki Inoue and Tsuyoshi Abe and Kazuhisa Ishizaka
and Junji Sakai and Masato Edahiro",
title = "Dynamic security domain scaling on embedded symmetric
multiprocessors",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497567",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a method for dynamic security-domain
scaling on SMPs that offers both highly scalable
performance and high security for future high-end
embedded systems. Its most important feature is its
highly efficient use of processor resources,
accomplished by dynamically changing the number of
processors within a security-domain (i.e., dynamically
yielding processors to other security-domains) in
response to application load requirements. Two new
technologies make this scaling possible without any
virtualization software: (1) self-transition management
and (2) unified virtual address mapping. Evaluations
show that this domain control provides highly scalable
performance and incurs almost no performance overhead
in security-domains. The increase in OSs in binary code
size is less than 1.5\%, and the time required for
individual state transitions is on the order of a
single millisecond. This scaling is the first in the
world to make possible the dynamic changing of the
number of processors within a security-domain on an ARM
SMP.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "AMP; dynamic security-domain scaling; SMP",
}
@Article{Qiu:2009:CMW,
author = "Meikang Qiu and Edwin H.-M. Sha",
title = "Cost minimization while satisfying hard\slash soft
timing constraints for heterogeneous embedded systems",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497568",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In high-level synthesis for real-time embedded systems
using heterogeneous functional units (FUs), it is
critical to select the best FU type for each task.
However, some tasks may not have fixed execution times.
This article models each varied execution time as a
probabilistic random variable and solves {\em
heterogeneous assignment with probability\/} (HAP)
problem. The solution of the HAP problem assigns a
proper FU type to each task such that the total cost is
minimized while the timing constraint is satisfied with
a guaranteed confidence probability. The solutions to
the HAP problem are useful for both hard real-time and
soft real-time systems. Optimal algorithms are proposed
to find the optimal solutions for the HAP problem when
the input is a tree or a simple path. Two other
algorithms, one is optimal and the other is
near-optimal heuristic, are proposed to solve the
general problem. The experiments show that our
algorithms can effectively reduce the total cost while
satisfying timing constraints with guaranteed
confidence probabilities. For example, our algorithms
achieve an average reduction of 33.0\% on total cost
with 0.90 confidence probability satisfying timing
constraints compared with the previous work using
worst-case scenario.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Embedded Systems; heterogeneous; high-level synthesis;
real-time",
}
@Article{Zhou:2009:TAR,
author = "Xiangrong Zhou and Chenjie Yu and Peter Petrov",
title = "Temperature-aware register reallocation for register
file power-density minimization",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497569",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Increased chip temperature has been known to cause
severe reliability problems and to significantly
increase leakage power. The register file has been
previously shown to exhibit the highest temperature
compared to all other hardware components in a modern
high-end embedded processor, which makes it
particularly susceptible to faults and elevated leakage
power. We show that this is mostly due to the highly
clustered register file accesses where a set of few
registers physically placed close to each other are
accessed with very high frequency. We propose
compile-time temperature-aware register reallocation
methodologies for breaking such groups of registers and
to uniformly distribute the accesses to the register
file. This is achieved with {\em no performance\/} and
{\em no hardware overheads}. We show that the
underlying problem is NP-hard, and subsequently
introduce and evaluate two efficient algorithmic
heuristics. Our extensive experimental study
demonstrates the efficiency of the proposed
methodology.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hong:2009:RFD,
author = "Yu-Ru Hong and Juinn-Dar Huang",
title = "Reducing fault dictionary size for million-gate large
circuits",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497570",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In general, fault dictionary is prevented from
practical applications in fault diagnosis due to its
extremely large size. Several previous works are
proposed for the fault dictionary size reduction.
However, some of them fail to bring down the size to an
acceptable level, and others might not be able to
handle today's million-gate circuits due to their high
time and space complexity. In this article, an
algorithm is presented to reduce the size of pass-fail
dictionary while still preserving high diagnostic
resolution. The proposed algorithm possesses low time
and space complexity by avoiding constructing the huge
distinguishability table, which inevitably boosts up
the required computation complexity. Experimental
results demonstrate that the proposed algorithm is
capable of handling industrial million-gate large
circuits in a reasonable amount of runtime and
memory.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "diagnostic resolution; fault diagnosis; Fault
dictionary",
}
@Article{Kavousianos:2009:EPS,
author = "Xrysovalantis Kavousianos and Dimitris Bakalis and
Dimitris Nikolos",
title = "Efficient partial scan cell gating for low-power
scan-based testing",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497571",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Gating of the outputs of a portion of the scan cells
(partial gating) has been recently proposed as a method
for reducing the dynamic power dissipation during
scan-based testing. We present a new systematic method
for selecting, under area and performance design
constraints, the most suitable for gating subset of
scan cells as well as the proper gating value for each
one of them, aiming at the reduction of the average
switching activity during testing. We show that the
proposed method outperforms the corresponding already
known methods, with respect to average dynamic power
dissipation reduction.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Low-power testing; partial gating; scan cell gating;
scan-based testing",
}
@Article{Rakhmatov:2009:BVM,
author = "Daler Rakhmatov",
title = "Battery voltage modeling for portable systems",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497572",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Limited battery life imposes stringent constraints on
the operation of battery-powered portable systems.
During battery discharge, the battery voltage
decreases, until a certain cutoff value is reached,
marking the end of battery life. The amount of
discharge capacity and energy delivered by the battery
during its life depends not only on the battery
characteristics, but also on the load conditions. A
different system design may result in a different
battery current (load) profile over time, leading to a
different battery voltage profile over time. This
article presents an analytical model that relates the
battery voltage to the battery current, thus
facilitating system design optimizations with respect
to the battery performance. It captures well-known
nonlinear phenomena of capacity loss at high discharge
rates, charge recovery, and capacity fading. The
proposed model has been validated against measurements
taken on Li-ion batteries. We also describe techniques
for efficient calculations of model's estimates, which
lets a user exploit accuracy-complexity tradeoffs.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "accuracy-complexity tradeoff; analytical modeling;
battery performance; battery-powered systems; Low-power
design",
}
@Article{Kumar:2009:EML,
author = "Yokesh Kumar and Prosenjit Gupta",
title = "External memory layout vs. schematic",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497573",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The circuit represented by a VLSI layout must be
verified by checking it against the schematic circuit
as an important part of the functional verification
step. This involves two central problems of matching
the circuit graphs with each other (graph isomorphism)
and extracting a higher level of circuit from a given
level by finding subcircuits in the circuit graph
(subgraph isomorphism). Modern day VLSI layouts contain
millions of devices. Hence the memory requirements of
the data structures required by tools for verifying
them become huge and can easily exceed the amount of
internal memory available on a computer. In such a
scenario, a program not aware of the memory hierarchy
performs badly because of its unorganized input/output
operations (I/Os) as the speed of a disk access is
about a million times slower than accessing a main
memory location. In this article, we present
I/O-efficient algorithms for the graph isomorphism and
subgraph isomorphism problems in the context of
verification of VLSI layouts. Experimental results show
the need and utility of I/O-efficient algorithms for
handling problems with large memory requirements.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design automation; external memory algorithms; Graph;
subgraph isomorphism; verification of layouts",
}
@Article{Chen:2009:SAP,
author = "Po-Yuan Chen and Kuan-Hsien Ho and Tingting Hwang",
title = "Skew-aware polarity assignment in clock tree",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497574",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In modern sequential VLSI designs, clock tree plays an
important role in synchronizing different components in
a chip. To reduce peak current and power/ground noises
caused by clock network, assigning different signal
polarities to clock buffers is proposed in previous
work. Although peak current and power/ground noises are
minimized by signal polarities assignment, an
assignment without timing information may increase the
clock skew significantly. As a result, a timing-aware
signal polarities assigning technique is necessary. In
this article, we propose a novel signal polarities
assigning technique which can not only reduce peak
current and power/ground noises simultaneously but also
render the clock skew in control. The experimental
result shows that the clock skew produced by our
algorithm is 94\% of original clock skew in average
while the clock skews produced by three algorithms
(Partition, MST, Matching) in the absence of post clock
tuning steps in the previous work are 235\%, 272\%, and
283\%, respectively. Moreover, our algorithm is as
efficient as the three algorithms of the previous work
in reducing peak current and power/ground noises.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Clock skew; clock tree; peak current; polarity
assignment; power/ground noise",
}
@Article{Cho:2009:BHR,
author = "Minsik Cho and Katrina Lu and Kun Yuan and David Z.
Pan",
title = "{BoxRouter 2.0}: a hybrid and robust global router
with layer assignment for routability",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "32:1--32:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497575",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present BoxRouter 2.0, and discuss
its architecture and implementation. As
high-performance VLSI design becomes more
interconnect-dominant, efficient congestion elimination
in global routing is in greater demand. Hence, we
propose a global router which has a strong ability to
improve routability and minimize the number of vias
with blockages, while minimizing wirelength. BoxRouter
2.0 is extended from BoxRouter 1.0, but can perform
multi-layer routing with 2D global routing and layer
assignment. Our 2D global routing is equipped with two
ideas: node shifting for congestion-aware Steiner tree
and robust negotiation-based A* search for routing
stability. After 2D global routing, 2D-to-3D mapping is
done by the layer assignment which is powered by
progressive via/blockage-aware integer linear
programming. Experimental results show that BoxRouter
2.0 has better routability with comparable wirelength
than other routers on ISPD07 benchmark, and it can
complete (no overflow) the widely used ISPD98 benchmark
for the first time in the literature with the shortest
wirelength. We further generate a set of harder ISPD98
benchmarks to push the limit of BoxRouter 2.0, and
propose the hardened ISPD98 benchmarks to map
state-of-the-art solutions for future routing
research.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "congestion; global routing; integer linear
programming; layer assignment; physical design;
routability; VLSI",
}
@Article{Gulati:2009:FBH,
author = "Kanupriya Gulati and Suganth Paul and Sunil P. Khatri
and Srinivas Patil and Abhijit Jas",
title = "{FPGA}-based hardware acceleration for {Boolean}
satisfiability",
journal = j-TODAES,
volume = "14",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1497561.1497576",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Apr 2 15:06:01 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present an FPGA-based hardware solution to the
Boolean satisfiability (SAT) problem, with the main
goals of scalability and speedup. In our approach the
traversal of the implication graph as well as conflict
clause generation are performed in hardware, in
parallel. The experimental results and their analysis,
along with the performance models are discussed. We
show that an order of magnitude improvement in runtime
can be obtained over MiniSAT (the best-in-class
software based approach) by using a Virtex-4
(XC4VFX140) FPGA device. The resulting system can
handle instances with as many as 10K variables and 280K
clauses.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Boolean Constant Propagation (BCP); Boolean
satisfiabilty (SAT); conflict induced clauses; FPGA;
non-chronological backtrack",
}
@Article{Malik:2009:SCU,
author = "Avinash Malik and Zoran Salcic and Partha S. Roop",
title = "{SystemJ} compilation using the {Tandem Virtual
Machine} approach",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "34:1--34:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529256",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "SystemJ is a language based on the Globally
Asynchronous Locally Synchronous (GALS) paradigm. A
SystemJ program is a collection of GALS nodes, also
called clock domains, and each clock domain is a
synchronous program that extends the Java language.
Initial compilation of SystemJ has been to standard
Java executing on a Java Virtual Machine (JVM), which
is both inefficient and bulky for small embedded
systems. This article proposes a new approach for
compiling and executing SystemJ using a new type of
virtual machine, called a Tandem Virtual Machine (TVM).
The TVM approach provides an efficient implementation
of SystemJ on both standard processors and
resource-constrained embedded processors. The new
approach is based on separating the control-driven and
data-driven operations for execution on two virtual
machines. While the JVM executes the data-driven
operations, a Control Virtual Machine (CVM) is
introduced to execute the control-driven parts of a
SystemJ program. The TVM approach is capable of
handling all data-driven and control-driven operations
required by the GALS model. The benchmark results show
that the TVM has code size improvements of over 60\% on
average and also a substantial improvement in execution
speed over standard Java-based compilation.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compilation; esterel; System-level design; SystemJ;
virtual machines",
}
@Article{Cong:2009:SRB,
author = "Jason Cong and Yiping Fan and Junjuan Xu",
title = "Simultaneous resource binding and interconnection
optimization based on a distributed register-file
microarchitecture",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "35:1--35:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529257",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Behavior synthesis and optimization beyond the
register-transfer level require an efficient
utilization of the underlying platform features. This
article presents a platform-based resource binding
approach based on a {\em Distributed Register-File
Microarchitecture (DRFM)}, which makes efficient use of
distributed embedded memory blocks as register files in
modern FPGAs. DRFM contains multiple islands, each
having a local register file, a functional unit pool,
and data-routing logic. Compared to the traditional
discrete-register counterpart, a DRFM allows use of the
platform-featured on-chip memory or register-file IP
blocks to implement its local register files, and this
results in a substantial saving of multiplexing logic
and global interconnects. DRFM provides a useful
architectural template and a direct optimization
objective for minimizing interisland connections for
synthesis algorithms. Given the scheduling solution and
resource (functional units) constraints, two novel
algorithms in the resource binding stage are developed
based on DRFM: (i) a simultaneous DRFM clustering and
binding algorithm, which decides the configuration of
DRFM and the assignment of operations into islands with
the focus on optimizing global connections; (ii) a
data-forwarding scheduling algorithm, which takes
advantage of the operation slacks to handle the
read-port restriction of register files. On the Xilinx
Virtex4 FPGA platform, experimental results with a set
of real-life test cases show a 50\% logic area
reduction achieved by applying our approach, with a
14.6\% performance improvement, compared to the
traditional discrete-register-based approach. Also,
experiments on small-size designs show that our
algorithm produces the same number of total connections
and at most one more maximum feeding-in connection
compared to optimal solutions generated by ILP.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Behavioral synthesis; distributed register file;
resource binding",
}
@Article{Raghavan:2009:PTG,
author = "Praveen Raghavan and Murali Jayapala and Andy
Lambrechts and Javed Absar and Francky Catthoor",
title = "Playing the trade-off game: {Architecture} exploration
using {Coffeee}",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "36:1--36:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529258",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern mobile devices need to be extremely energy
efficient. Due to the growing complexity of these
devices, energy-aware design exploration has become
increasingly important. Current exploration tools often
do not support energy estimation, or require the design
to be very detailed before estimation is possible. It
is important to get early feedback on both performance
and energy consumption during all phases of the design
and at higher abstraction levels. This article presents
a unified optimization and exploration framework to
explore source-level transformation to processor
architecture design space. The proposed retargetable
compiler and simulator framework can map applications
to a range of processors and memory configurations,
simulate, and report detailed performance and energy
estimates. An accurate and consistent energy modeling
approach is introduced which can estimate the energy
consumption of processor and memories at a component
level, which can help to guide the design process. Fast
energy-aware architecture exploration is illustrated by
modeling both state-of-the-art processors as well as
other architectures. Various design trade-offs are also
illustrated on different academic as well as industrial
benchmarks from both the wireless communication and
multimedia domain. We also illustrate a design space
exploration on different applications and show that
there is large trade-off space between application
performance, energy consumption, and area. We show that
the proposed framework is consistent, accurate, and
covers a large design space including various novel
low-power extensions in a unified framework.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "architecture exploration; area; compiler-architecture
interaction; design; embedded systems; Energy; loop
transformations; power estimation; power-performance
trade-off; processors; VLIW",
}
@Article{Das:2009:SBT,
author = "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
title = "Scenario-based timing verification of multiprocessor
embedded applications",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "37:1--37:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529259",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This work presents a static timing-analysis method for
verification of scenario-based real-time properties, on
graphical task-level models of embedded applications.
Scenario-based properties specify timing constraints
which must be honored for specific control-flow
behaviors and task execution orderings. Static checking
of scenario-based properties currently requires
computationally expensive model checking methods. Hence
the proposed graph-based static timing-analysis
algorithm improves upon the state-of-the-art. This is
manifested in a significant performance advantage over
timed model checking (up to 1000X in several cases),
which suffers from state space explosion. The proposed
algorithm also employs compositional reasoning and
abstraction refinement for handling large problems. We
also illustrate methods for using scenario-based timing
analysis, which can act as alternatives to traditional
timed model checking for verification of timed systems
like FDDI and Fischer protocols. We implement this
timing verification algorithm as a tool called {\em
SymTime\/} and present experimental results for SymTime
comparing it with SPIN, UPPAAL, and a TCTL model
checker for Time Petri Nets, called Romeo.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "execution scenarios; real time systems; static timing
analysis; Timing verification",
}
@Article{Grosse:2009:MPO,
author = "Philippe Grosse and Yves Durand and Paul Feautrier",
title = "Methods for power optimization in {SOC}-based data
flow systems",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "38:1--38:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529260",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Whereas the computing power of DSP or general-purpose
processors was sufficient for 3G baseband
telecommunication algorithms, stringent timing
constraints of 4G wireless telecommunication systems
require computing-intensive data-driven architectures.
Managing the complexity of these systems within the
energy constraints of a mobile terminal is becoming a
major challenge for designers. System-level low-power
policies have been widely explored for generic
software-based systems, but data-flow architectures
used for high data-rate telecommunication systems
feature heterogeneous components that require specific
configurations for power management. In this study, we
propose an innovative power optimization scheme
tailored to self-synchronized data-flow systems. Our
technique, based on the synchronous data-flow modeling
approach, takes advantage of the latest low-power
techniques available for digital architectures. We
illustrate our optimization method on a complete 4G
telecommunication baseband modem and show the energy
savings expected by this technique considering present
and future silicon technologies.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "4G base-band modem; data-driven SOC; Power
optimization; synchronous data-flow graph",
}
@Article{Clarke:2009:WLS,
author = "Jonathan A. Clarke and George A. Constantinides and
Peter Y. K. Cheung",
title = "Word-length selection for power minimization via
nonlinear optimization",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "39:1--39:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529261",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article describes the first method for minimizing
the dynamic power consumption of a Digital Signal
Processing (DSP) algorithm implemented on
reconfigurable hardware via word-length optimization.
Fast models for estimating the power consumption of the
arithmetic components and the routing power of these
algorithm implementations are used within a constrained
nonlinear optimization formulation that solves a
relaxed version of word-length optimization. Tight
lower and upper bounds on the cost of the integer
word-length problem can be obtained using the proposed
solution, with typical upper bounds being 2.9\% and
5.1\% larger than the lower bounds for area and power
consumption, respectively. Heuristics can then use the
upper bound as a starting point from which to get even
closer to the known lower bound. Results show that
power consumption can be improved by up to 40\%
compared to that achieved when using simple word-length
selection techniques, and further comparisons are made
between the minimization of different cost functions
that give insight into the advantages offered by
multiple word-length optimization.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bitwidth; power consumption; Power consumption; signal
processing; synthesis; word length",
}
@Article{Morgado:2009:GRS,
author = "P. Marques Morgado and Paulo F. Flores and L. Miguel
Silveira",
title = "Generating realistic stimuli for accurate power grid
analysis",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "40:1--40:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529262",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power analysis tools are an integral component of any
current power sign-off methodology. The performance of
a design's power grid affects the timing and
functionality of a circuit, directly impacting the
overall performance. Ensuring power grid robustness
implies taking into account, among others, static and
dynamic effects of voltage drop, ground bounce, and
electromigration. This type of verification is usually
done by simulation, targeting a worst-case scenario
where devices, switching almost simultaneously, could
impose stern current demands on the power grid. While
determination of the exact worst-case switching
conditions from the grid perspective is usually not
practical, the choice of simulation stimuli has a
critical effect on the results of the analysis.
Targetting safe but unrealistic settings could lead to
pessimistic results and costly overdesigns in terms of
die area. In this article we describe a software tool
that generates a reasonable, realistic, set of stimuli
for simulation. The approach proposed accounts for
timing and spatial restrictions that arise from the
circuit's netlist and placement and generates an
approximation to the worst-case condition. The
resulting stimuli indicate that only a fraction of the
gates change in any given timing window, leading to a
more robust verification methodology, especially in the
dynamic case. Generating such stimuli is akin to
performing a standard static timing analysis, so the
tool fits well within conventional design frameworks.
Furthermore, the tool can be used for hotspot detection
in early design stages.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "ground bounce; Power grid; simulation; stimuli
generation; verification; voltage drop",
}
@Article{Yu:2009:APG,
author = "Hao Yu and Joanna Ho and Lei He",
title = "Allocating power ground vias in {$3$D} {ICs} for
simultaneous power and thermal integrity",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "41:1--41:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529263",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The existing work on via allocation in 3D ICs ignores
power/ground vias' ability to simultaneously reduce
voltage bounce and remove heat. This article develops
the first in-depth study on the allocation of
power/ground vias in 3D ICs with simultaneous
consideration of power and thermal integrity. By
identifying principal ports and parameters, effective
electrical and thermal macromodels are employed to
provide dynamic power and thermal integrity as well as
sensitivity with respect to via density. With the use
of sensitivity, an efficient via allocation
simultaneously driven by power and thermal integrity is
developed. Experiments show that, compared to
sequential power and thermal optimization using static
integrity, sequential optimization using the dynamic
integrity reduces nonsignal vias by up to 18\%, and
simultaneous optimization using dynamic integrity
further reduces nonsignal vias by up to 45.5\%.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "macromodeling; parametric 3D-IC design; Thermal and
power integrity",
}
@Article{Liu:2009:MAA,
author = "Bo Liu and Francisco V. Fern{\'a}ndez and Georges
Gielen and R. Castro-L{\'o}pez and E. Roca",
title = "A memetic approach to the automatic design of
high-performance analog integrated circuits",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "42:1--42:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529264",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article introduces an evolution-based
methodology, named memetic single-objective
evolutionary algorithm (MSOEA), for automated sizing of
high-performance analog integrated circuits. Memetic
algorithms may achieve higher global and local search
ability by properly combining operators from different
standard evolutionary algorithms. By integrating
operators from the differential evolution algorithm,
from the real-coded genetic algorithm, operators
inspired by the simulated annealing algorithm, and a
set of constraint handling techniques, MSOEA
specializes in handling analog circuit design problems
with numerous and tight design constraints. The method
has been tested through the sizing of several analog
circuits. The results show that design specifications
are met and objective functions are highly optimized.
Comparisons with available methods like genetic
algorithm and differential evolution in conjunction
with static penalty functions, as well as with
intelligent selection-based differential evolution, are
also carried out, showing that the proposed algorithm
has important advantages in terms of constraint
handling ability and optimization quality.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Analog circuit sizing; analog design automation;
constrained optimization; memetic algorithm",
}
@Article{Mutyam:2009:SST,
author = "Madhu Mutyam",
title = "Selective shielding technique to eliminate crosstalk
transitions",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529265",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With CMOS process technology scaling to deep submicron
level, propagation delay across long on-chip buses is
becoming one of the main performance limiting factors
in high-performance designs. Propagation delay is very
significant when adjacent wires are transitioning in
opposite direction as compared to transitioning in the
same direction. As opposite transitions on adjacent
wires (called as {\em crosstalk transitions\/}) have
significant impact on propagation delay, several bus
encoding techniques have been proposed in literature to
eliminate such transitions.\par
We propose {\em selective shielding\/} technique to
eliminate crosstalk transitions. We show that the
selective shielding technique requires $ \lceil 3 n / 2
\rceil $ wires to encode a $n$-bit bus. SPICE
simulations by considering 90nm technology nodes reveal
that, for uniformly distributed random data, our
technique achieves nearly 39\% (21\%) delay savings
over 10 {\em mm\/}-length uncoded 32-bit bus for
pipelined (nonpipelined) data transmission at the cost
of nearly 7\% energy overhead.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bus encoding; Crosstalk; power consumption; switching
activity",
}
@Article{Taskin:2009:CTR,
author = "Baris Taskin and Joseph Demaio and Owen Farell and
Michael Hazeltine and Ryan Ketner",
title = "Custom topology rotary clock router with tree
subnetworks",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "44:1--44:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529266",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Increasing demands on computing power have spurred the
development of faster, higher-density Integrated
Circuits (ICs), compounding power and complexity
concerns in design budgets. The clock distribution
network is a significant contributor to such power and
complexity concerns. Resonant rotary clocking is a
relatively new technology that realizes several
benefits over current clocking methods, including
power, frequency, and variation tolerance, yet lacks
the automation tools to promote increased use. Towards
this end, an automated rotary clock routing methodology
is presented that generates custom topology rotary ring
routes with tree subnetworks. In addition to the
benefits of adiabatic clocking, the presented custom
topology router permits 38.6\% shorter wirelengths on
average for register tapping, compared to traditional
prescribed skew, binary tree routing.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "clock network design; clock skew; multiphase
synchronization; Resonant rotary clocking",
}
@Article{Liu:2009:HPO,
author = "Chih-Hung Liu and Shih-Yi Yuan and Sy-Yen Kuo and
Szu-Chi Wang",
title = "High-performance obstacle-avoiding rectilinear
{Steiner} tree construction",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "45:1--45:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529267",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Rectilinear Steiner trees are used to route signal
nets by global and detail routers in VLSI design for a
long time. However, in current IC industry, there are
significantly increasing obstacles to be considered,
such as large-scale power networks, pre-routed nets, IP
blocks, and antenna jumpers. Accordingly, the {\em
obstacle-avoiding rectilinear Steiner minimal tree\/}
(OARSMT) problem has become more important. In this
article, we propose a new routing graph, {\em
obstacle-avoiding routing graph\/} (OARG), for the
OARSMT problem. Due to the important properties of
OARG, we construct a 3-step algorithm and a local
refinement scheme, which both can take advantage of
these properties, to find a suboptimal solution
efficiently. Furthermore, each step of our 3-step
algorithm as well as the local refinement scheme has
theoretical or practical benefits. Therefore, each of
them can be applicable to other existing works for
general or specific considerations such as efficiency
or effectiveness. Extensive experimental results show
that our method outperforms all existing works in terms
of wirelength and achieves the best speed
performance.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "obstacle-avoiding; rectilinear; Routing; Steiner
tree",
}
@Article{Yan:2009:TAS,
author = "Tan Yan and Martin D. F. Wong",
title = "Theories and algorithms on single-detour routing for
untangling twisted bus",
journal = j-TODAES,
volume = "14",
number = "3",
pages = "46:1--46:??",
month = may,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1529255.1529268",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Jun 3 16:12:53 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Previous works on PCB bus routing assume matched pin
ordering on both sides. But in practice, the pin
ordering might be mismatched and the nets become
twisted. In this article, we propose a preprocessing
step to untangle such twisted nets. We also introduce a
practical routing style, which we call {\em
single-detour routing}, to simplify the untangling
problem. We then present a necessary and sufficient
condition for the existence of single-detour routing
solutions. Furthermore, we present a
dynamic-programming-based algorithm to solve the
single-detour untangling problem with consideration of
wire capacity between adjacent pins. Our algorithm
produces an optimal single-detour routing solution that
rematches the pin ordering. By integrating our
algorithm into the bus router in a previous
length-matching router, we show that many routing
problems that cannot be solved previously can now be
solved with insignificant increase in runtime.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Bus routing; dynamic programming; printed circuit
board (PCB); single-detour routing; twisted bus",
}
@Article{Gopalakrishnan:2009:ATB,
author = "Sivaram Gopalakrishnan and Priyank Kalla",
title = "{2009 ACM TODAES} best paper award: {Optimization} of
polynomial datapaths using finite ring algebra",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "47:1--47:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562515",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bertels:2009:EMM,
author = "Peter Bertels and Wim Heirman and Erik D'Hollander and
Dirk Stroobandt",
title = "Efficient memory management for hardware accelerated
{Java Virtual Machines}",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "48:1--48:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562516",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Application-specific hardware accelerators can
significantly improve a system's performance. In a
Java-based system, we then have to consider a hybrid
architecture that consists of a Java Virtual Machine
running on a general-purpose processor connected to the
hardware accelerator. In such a hybrid architecture,
data communication between the accelerator and the
general-purpose processor can incur a significant cost,
which may even annihilate the original performance
improvement of adding the accelerator. A careful layout
of the data in the memory structure is therefore of
major importance to maintain the acceleration
performance benefits.\par
This article addresses the reduction of the
communication cost in a distributed shared memory
consisting of the main memory of the processor and the
accelerator's local memory, which are unified in the
Java heap. Since memory access times are highly
nonuniform, a suitable allocation of objects in either
main memory or the accelerator's local memory can
significantly reduce the communication cost. We propose
several techniques for finding the optimal location for
each Java object's data, either statically through
profiling or dynamically at runtime. We show how we can
reduce communication cost by up to 86\% for the SPECjvm
and DaCapo benchmarks. We also show that the best
strategy is application dependent and also depends on
the relative cost of remote versus local accesses. For
a relative cost higher than 10, a self-learning dynamic
approach often results in the best performance.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Dynamic memory management; hardware acceleration; Java
Virtual Machine",
}
@Article{Faezipour:2009:HPE,
author = "Miad Faezipour and Mehrdad Nourani and Rina
Panigrahy",
title = "A hardware platform for efficient worm outbreak
detection",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "49:1--49:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562517",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Network Intrusion Detection Systems (NIDS) monitor
network traffic to detect attacks or unauthorized
activities. Traditional NIDSes search for patterns that
match typical network compromise or remote hacking
attempts. However, newer networking applications
require finding the frequently repeated strings in a
packet stream for further investigation of potential
attack attempts. Finding frequently repeated strings
within a given time frame of the packet stream has been
quite efficient to detect polymorphic worm outbreaks. A
novel real-time worm outbreak detection system using
two-phase hashing and monitoring repeated common
substrings is proposed in this article. We use the
concept of shared counters to minimize the memory cost
while efficiently sifting through suspicious strings.
The worm outbreak system has been prototyped on Altera
Stratix FPGA. We have tested the system for various
settings and packet stream sizes. Experimental results
verify that our system can support line speed of
gigabit-rates with negligible false positive and
negative rates.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "false negative; false positive; hashing; Network
Intrusion Detection System; polymorphic worm; shared
counters; worm outbreak",
}
@Article{Lee:2009:TSA,
author = "Byunghyun Lee and Ki-Seok Chung and Bontae Koo and
Nak-Woong Eum and Taewhan Kim",
title = "Thermal sensor allocation and placement for
reconfigurable systems",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "50:1--50:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562518",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A dynamic monitoring of thermal behavior of hardware
resources using thermal sensors is very important to
maintain the operation of systems safe and reliable.
This article addresses the problem of thermal sensor
allocation and placement for reconfigurable systems.
For programmable logic arrays, the degree of the use of
hardware resources in the systems highly depends on the
target application to be implemented, making the
allocation of thermal sensors at the manufacturing
stage inadequate (or too costly if implemented) due to
the unpredictable thermal profile. This means that the
thermal sensor allocation could be processed at the
time when the reconfigurable logic is implemented
(i.e., at the post manufacturing stage). This work
proposes an effective solution to the problem of
thermal sensor allocation and placement at the
post-manufacturing stage. Specifically, we define the
Sensor Allocation and Placement Problem (SAPP), and
propose a solution which formulates SAPP into the
Unate-Covering Problem (UCP) and solves it optimally.
Also we combine SAPP with temperature correlation to
reduce required sensors more aggressively and propose a
solution by applying UCP again. We then provide an
extended solution to handle a practical design issue
where the hardware resources for the sensor
implementation on specific array locations have already
been used up by the application logic. Experimental
results using MCNC benchmarks show that our proposed
technique uses 62.4\% and 19.7\% less number of sensors
to monitor hotspots on the average than that used by
the grid-based and the bisection-based approaches while
the overhead of auxiliary circuitry is minimized,
respectively.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "optimal placement; reconfigurable system; Thermal
sensor; unate-covering problem",
}
@Article{Yuh:2009:TTB,
author = "Ping-Hung Yuh and Chia-Lin Yang and Yao-Wen Chang",
title = "{T}-trees: a tree-based representation for temporal
and three-dimensional floorplanning",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "51:1--51:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562519",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Improving logic capacity by time-sharing, dynamically
reconfigurable FPGAs are employed to handle designs of
high complexity and functionality. In this article, we
model each task as a 3D-box and deal with the temporal
floorplanning/placement problem for dynamically
reconfigurable FPGA architectures. We present a
tree-based data structure, called {\em T-trees}, to
represent the spatial and temporal relations among
tasks. Each node in a T-tree has at most three children
which represent the dimensional relationship among
tasks. For the T-tree, we develop an efficient packing
method and derive the condition to ensure the
satisfaction of precedence constraints which model the
temporal ordering among tasks induced by the execution
of dynamically reconfigurable FPGAs. Experimental
results show that our tree-based formulation can obtain
significantly better solution quality with less
execution time than the most recent state-of-the-art
work.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "partially dynamical reconfiguration; Reconfigurable
computing; temporal floorplanning",
}
@Article{Yuh:2009:LAT,
author = "Ping-Hung Yuh and Chia-Lin Yang and Chi-Feng Li and
Chung-Hsiang Lin",
title = "Leakage-aware task scheduling for partially
dynamically reconfigurable {FPGAs}",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "52:1--52:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562520",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As technology continues to shrink, reducing leakage
power of Field-Programmable Gate Arrays (FPGAs) becomes
a critical issue for the practical use of FPGAs. In
this article, we address the leakage issue of partially
dynamically reconfigurable FPGA architectures with
sleep transistors embedded into FPGA fabrics. In
particular, we focus on eliminating leakage waste due
to the delay between reconfiguration and execution time
of a task. For partially dynamically reconfigurable
FPGAs, the configuration prefetching technique is
commonly used to hide runtime reconfiguration overhead.
With prefetching, the configuration of a task is loaded
into FPGAs as early as possible. Therefore, there is
often a delay between reconfiguration and execution
time of a task. In this period of time, the SRAM cells
allocated to a task cannot be turned off even though
they are not utilized.\par
In this article, we propose a two-stage task scheduling
methodology to reduce leakage waste due to the delay
between reconfiguration and execution time of a task
without sacrificing performance. In the first stage, a
performance-driven task scheduler that targets at
minimizing the schedule length is invoked to generate
an initial placement. In the second stage, a
postplacement leakage-aware task scheduling is applied
to refine the initial placement such that leakage waste
is minimized provided that the schedule length is not
increased. To solve the postplacement leakage
optimization problem, we propose two algorithms. The
first one is an optimal algorithm based on Integer
Linear Programming (ILP). The second algorithm is a
heuristic approach that iteratively refines the
placement to reduce leakage waste. Experimental results
on real and synthetic designs show that the efficiency
and effectiveness of the proposed postplacement leakage
reduction techniques.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "leakage; partially dynamical reconfiguration;
placement; Reconfigurable computing; scheduling",
}
@Article{Chen:2009:LRD,
author = "Po-Yuan Chen and Chiao-Chen Fang and Tingting Hwang
and Hsi-Pin Ma",
title = "Leakage reduction, delay compensation using
partition-based tunable body-biasing techniques",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "53:1--53:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562521",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In recent years, fabrication technology of CMOS has
scaled to nanometer dimensions. As scaling progresses,
several new challenges follow. Among them, the most
noticeable two are process variations and leakage
current of the circuit. To tackle the problems of
process variations and leakage current, an effective
way is to use a body-biasing technique. In substance,
using the RBB technique can minimize leakage current
but increase the delay of a gate. Contrary to RBB, the
FBB technique decreases the delay but increases leakage
current of a gate. In the previous work, a single
body-biasing is applied to the whole circuit. In a slow
circuit, since the FBB is applied to the whole circuit,
the leakage current of all gates in the circuit
increases dramatically. On the other hand, in a fast
circuit, RBB is applied to decrease the leakage
current. However, without violating the timing
specification, the value of body-biasing is restricted
by the critical paths, and the saving of leakage
current is limited. In this article, we propose a
design flow to partition the circuit into subcircuits
so that each subcircuit can be applied its individual
RBB or FBB. Experiments show that our method is able to
save leakage current from 42\% to 47\% as compared to
designs not using a body-biasing technique. Under
process variations, our method can save 42\% to 49\%
leakage on fast circuits and 20\% to 35\% on slow
circuits.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Body biasing; leakage current; low-power design;
process variations",
}
@Article{Ranganathan:2009:VAM,
author = "Nagarajan Ranganathan and Upavan Gupta and
Venkataraman Mahalingam",
title = "Variation-aware multimetric optimization during gate
sizing",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "54:1--54:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562522",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The aggressive scaling of technology has not only
accentuated the effects of intradie parametric
variations in devices, but it has also impacted the
effects of optimizing a certain performance metric on
the optimality of other metrics. Thus, there is a need
for optimization methods that can perform the
simultaneous optimization of multiple metrics
considering the effects of process variations. In this
article, a novel variation-aware gate sizing framework
has been developed that can perform simultaneous
optimization of multiple performance metrics. In this
framework, the relationships between the optimization
metrics (like dynamic power, leakage power, and
crosstalk noise) are modeled as a function of the gate
sizes in the objective function. The delay values
obtained from unconstrained delay optimization and the
noise margins derived from coupling capacitance
information form the constraints for the multimetric
optimization problem. As an abstract framework, it is
independent of the type of mathematical programming
approach as well as the metrics chosen to be optimized.
The framework has been implemented using a mathematical
programming approach and has been tested on ITC'99
benchmarks for different combinations of multimetric
and single-metric optimizations of delay, dynamic
power, leakage power, and crosstalk noise. The results
indicate that the framework identifies good solution
points, and is efficient for postlayout optimization
via gate sizing.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "crosstalk noise; delay; Gate sizing; mathematical
programming; optimization; power",
}
@Article{Moiseev:2009:PDO,
author = "Konstantin Moiseev and Avinoam Kolodny and Shmuel
Wimer",
title = "Power-delay optimization in {VLSI} microprocessors by
wire spacing",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "55:1--55:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1562523",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The problem of optimal space allocation among
interconnect wires in a VLSI layout, in order to
minimize the switching power consumption and the
average signal delay, is addressed in this article. We
define a Weighted Power-Delay Sum (WPDS) objective
function and derive necessary and sufficient conditions
for the existence of optimal interwire space
allocation, based on the notion of capacitance density.
At the optimum, every wire must be in equilibrium of
its line-to-line weighted capacitance density on its
two opposite sides, and the WPDS of the whole circuit
is minimal if and only if capacitance density is
uniformly distributed across the entire layout. This
condition is shown to be equivalent to all paths of the
layout cross-capacitance graph having the same length
and all cuts having the same flow. An implementation
which has been used in the design of a recent
commercial high-end microprocessor and yielded 17\%
power reduction and 9\% delay reduction in top-level
interconnects is presented.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "delay-optimization; interconnect optimization; power
optimization; Wire spacing",
}
@Article{Engelke:2009:SSU,
author = "Piet Engelke and Bernd Becker and Michel Renovell and
Juergen Schloeffel and Bettina Braitling and Ilia
Polian",
title = "{SUPERB}: {Simulator Utilizing Parallel Evaluation of
Resistive Bridges}",
journal = j-TODAES,
volume = "14",
number = "4",
pages = "56:1--56:??",
month = aug,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1562514.1596831",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 27 14:38:55 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A high-performance resistive bridging fault simulator
SUPERB (Simulator Utilizing Parallel Evaluation of
Resistive Bridges) is proposed. It is based on fault
sectioning in combination with parallel-pattern or
parallel-fault multiple-stuck-at simulation. It
outperforms a conventional interval-based resistive
bridging fault simulator by three orders of magnitude
while delivering identical results. Further competing
tools are outperformed by several orders of magnitude.
Industrial-size circuits, including a
multi-million-gates design, could be simulated with
runtimes within an order of magnitude of the runtimes
for pattern-parallel stuck-at fault simulation.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "bridging fault simulation; fault mapping; PPSFP;
Resistive bridging faults; SPPFP",
}
@Article{Chang:2009:DIE,
author = "Li-Pin Chang and Chun-Da Du",
title = "Design and implementation of an efficient
wear-leveling algorithm for solid-state-disk
microcontrollers",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640463",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Solid-state disks (SSDs) are storage devices that
emulate hard drives with flash memory. They have been
widely deployed in mobile computers as disk drive
replacements. Flash memory is organized in terms of
erase blocks. With the current technology, a block can
reach the end of its lifetime after thousands of
erasure operations. Wear leveling is a technique to
evenly erase the entire flash memory so that all blocks
remain alive as long as possible. This study introduces
a new wear-leveling algorithm based the observation
that, under a real-life mobile PC's workload, most
erasure operations are contributed by a small fraction
of blocks. Our key ideas are (1) moving rarely updated
data to a block that is extraordinarily worn and (2)
avoiding repeatedly involving a block in wear-leveling
activities. This study presents a successful
implementation of the proposed wear-leveling algorithm
using about 200 bytes of RAM in an SSD controller rated
at 33 MHz. Evaluation results show that this algorithm
achieves even wear of the entire flash memory while
reducing the overheads of extra flash-memory
operations.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "embedded systems; flash memory; solid-state disks;
Wear leveling",
}
@Article{Geelen:2009:SLE,
author = "Bert Geelen and Vissarion Ferentinos and Francky
Catthoor and Gauthier Lafruit and Diederik Verkest and
Rudy Lauwereins and Thanos Stouraitis",
title = "Spatial locality exploitation for runtime reordering
of {JPEG2000} wavelet data layouts",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "8:1--8:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640465",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Exploitation of spatial locality is essential for
memories to increase the access bandwidth and to reduce
the access-related latency and energy per word. Spatial
locality exploitation of a kernel can be improved by
modifying placement of data in memory, but this may be
felt not only by the kernel itself, but also in other
application components accessing the same data. Thus
care is needed to avoid global miss-rate improvements
are thwarted by miss-rate increases in other
application components. This article examines
application-level miss-rate increases due to handling
modified Wavelet Transform data layouts by explicitly
reordering at runtime, exploiting the execution order
freedom within a reordering buffer when the layout of
surrounding components is known. For the JPEG2000
application, taking into account the reordering costs
still results in 80\% net WT miss-rate gains.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Layout transformations; spatial locality; wavelet
transform",
}
@Article{Keutzer:2009:ATD,
author = "Kurt Keutzer and Peng Li and Li Shang and Hai Zhou",
title = "{ACM Transactions on Design Automation of Electronic
Systems (TODAES)} special section call for papers:
{Parallel CAD}: Algorithm design and programming",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "9:1--9:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640466",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2009:MLP,
author = "Jaehyun Kim and Chungki Oh and Youngsoo Shin",
title = "Minimizing leakage power of sequential circuits
through mixed-{$ V_t $} flip-flops and multi-{$ V_t $}
combinational gates",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640461",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The current use of multi-$ V_t $ to control leakage
power targets combinational gates, even though
sequential elements such as flip-flops and latches also
contribute appreciable leakage. We can, nevertheless,
apply multi-$ V_t $ to flip-flops, but few can take
advantage of high-$ V_t $, which causes abrupt changes
in timing. We combine low- and high-$ V_t $ at the
transistor level to design mixed-$ V_t $ flip-flops
with reduced leakage, an unchanged footprint, and a
small increase in either setup time or clock-to-Q
delay, but not both. An allocation algorithm for two $
V_t $'s determines the $ V_t $ (mixed, high, or low) of
each flip-flop and the $ V_t $ of each combinational
gate (high or low) in a sequential circuit. Experiments
with 65-nm technology show an average leakage saving of
42\% compared to conventional multi-$ V_t $ approaches;
the leakage of flip-flops alone is cut by 78\%. This
saving is largely unaffected by die-to-die or
within-die process variations, which we show through
simulations. Standard deviation of leakage caused by
process variation is also reduced due to less use of
low-$ V_t $ devices. We also extend our approach to
three $ V_t $'s, and obtain a further 14\% reduction in
leakage.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Flip-flop; leakage current; low power; mixed- V t;
sequential circuit",
}
@Article{Mu:2009:AHS,
author = "Jingqing Mu and Roman Lysecky",
title = "Autonomous hardware\slash software partitioning and
voltage\slash frequency scaling for low-power embedded
systems",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640459",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Warp processing is a recent computing technology
capable of autonomously partitioning the critical
kernels within an executing software application to
hardware circuits implemented within an on-chip FPGA.
While previous performance-driven warp processing has
been shown to provide significant performance
improvements over software only execution, the dynamic
performance improvement of warp processors may be lost
for certain application domains, such as real-time
systems. Alternatively, as power consumption continue
to become a dominant design constraint, we present and
thoroughly analyze a low-power warp processing
methodology that leverages voltage and/or frequency
scaling to substantially reduce power consumption
without any performance degradation --- all without
requiring designer effort beyond the initial software
development.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "dynamically adaptable systems; hardware/software
partitioning; low-power; low-power FPGAs;
reconfigurable computing; Warp processing",
}
@Article{Pomeranz:2009:UST,
author = "Irith Pomeranz and Sudhakar M. Reddy",
title = "Using stuck-at tests to form scan-based tests for
transition faults in standard-scan circuits",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640464",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In enhanced-scan circuits, a two-pattern test for a
transition fault can be obtained by using a test {\em
t$_j$ \/} that detects a stuck-at fault, and preceding
it by a test {\em t$_i$ \/} that activates another
stuck-at fault. Thus, test generation for transition
faults can be done by combining pairs of stuck-at
tests. This provides an alternative to deterministic
test generation, as well as reduces the test storage
requirements for transition fault tests. We study the
possibility of generating scan-based tests for
transition faults in standard-scan circuits in a
similar way, by combining pairs of stuck-at tests.
Since it is not always possible to obtain a
standard-scan test that is equivalent to a two-pattern
test based on stuck-at tests {\em t$_i$ \/} and {\em
t$_j$}, it is not always possible to guarantee that the
combination of {\em t$_i$ \/} and {\em t$_j$ \/} will
detect a transition fault. To compensate for this, it
is necessary to try combinations of different stuck-at
test pairs, resulting in an increased simulation effort
to compute effective standard-scan tests. Our focus in
this work is on reducing this simulation effort by
reducing the number of stuck-at test pairs that need to
be considered.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Broadside tests; scan circuits; skewed-load tests;
stuck-at faults; transition faults",
}
@Article{Rao:2009:COT,
author = "Rajeev R. Rao and Vivek Joshi and David Blaauw and
Dennis Sylvester",
title = "Circuit optimization techniques to mitigate the
effects of soft errors in combinational logic",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640462",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Soft errors in combinational logic circuits are
emerging as a significant reliability problem for VLSI
designs. Technology scaling trends indicate that the
soft error rates (SER) of logic circuits will be
dominant factor for future technology generations. SER
mitigation in logic can be accomplished by optimizing
either the gates inside a logic block or the flipflops
present on the block boundaries. We present novel
circuit optimization techniques that target these
elements separately as well as in unison to reduce the
SER of combinational logic circuits.\par
First, we describe the construction of a new class of
flip-flop variants that leverage the effect of temporal
masking by selectively increasing the length of the
latching window thereby preventing faulty transients
from being registered. In contrast to previous
flip-flop designs that rely on logic duplication and
complicated circuit design styles, the new variants are
redesigned from the library flip-flop using efficient
transistor sizing. We then propose a flip-flop
selection method that uses slack information at each
primary output node to determine the flip-flop
configuration that produces maximum SER savings. Next,
we propose a gate sizing algorithm that trades off SER
reduction and area overhead. This approach first
computes bounds on the maximum achievable SER reduction
by resizing a gate. This bound is then used to prune
the circuit graph, arriving at a smaller set of
candidate gates on which we perform incremental
sensitivity computations to determine the gates that
are the largest contributors to circuit SER. Third, we
propose a unified, co-optimization approach combining
flip-flop selection with the gate sizing algorithm. The
joint optimization algorithm produces larger SER
reductions while incurring smaller circuit overhead
than either technique taken in isolation. Experimental
results on a variety of benchmarks show average SER
reductions of 10.7X with gate sizing, 5.7X with
flip-flop assignment, and 30.1X for the combined
optimization approach, with no delay penalties and area
overheads within 5-6\%. The runtimes for the
optimization algorithms are on the order of 1-3
minutes.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "circuit optimization; combinational logic; sequential
circuits; Soft errors",
}
@Article{Wolinski:2009:ADA,
author = "Christophe Wolinski and Krzysztof Kuchcinski and Erwan
Raffin",
title = "Automatic design of application-specific
reconfigurable processor extensions with {UPaK}
synthesis kernel",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640458",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a new tool for automatic design
of application-specific reconfigurable processor
extensions based on UPaK (Abstract Unified Patterns
Based Synthesis Kernel for Hardware and Software
Systems). We introduce a complete design flow that
identifies new instructions, selects specific
instructions and schedules a considered application on
the newly created reconfigurable architecture. The
identified extensions are implemented as specialized
sequential or parallel instructions. These instructions
are executed on a reconfigurable unit implementing all
merged patterns. Our method uses specially developed
algorithms for subgraph isomorphism that are
implemented as graph matching constraints. These
constraints together with separate algorithms are able
to efficiently identify computational patterns and
carry out application mapping and scheduling. Our
methods can handle both time-constrained and
resource-constrained scheduling. Experimental results
show that the presented method provides high coverage
of application graphs with small number of patterns and
ensures high application execution speedup both for
sequential and parallel application execution with
reconfigurable processor extensions implementing
selected patterns.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "constraint programming; Reconfigurable architectures;
resource assignment; scheduling; system-level
synthesis",
}
@Article{Wu:2009:PCV,
author = "Meng-Chen Wu and Ming-Ching Lu and Hung-Ming Chen and
Jing-Yang Jou",
title = "Performance-constrained voltage assignment in multiple
supply voltage {SoC} floorplanning",
journal = j-TODAES,
volume = "15",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2009",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1640457.1640460",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:18:31 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Using voltage island methodology to reduce power
consumption for System-on-a-Chip (SoC) designs has
become more and more popular recently. Currently this
approach has been considered either in system-level
architecture or postplacement stage. Since hierarchical
design and reusable intellectual property (IP) are
widely used, it is necessary to optimize
floorplanning/placement methodology considering voltage
islands generation to solve power and critical path
delay problems. In this article, we propose a
floorplanning methodology considering voltage islands
generation and performance constraints. Our method is
flexible and can be extended to hierarchical design.
The experimental results on some MCNC benchmarks show
that our method is effective in meeting performance
constraints and can simultaneously consider the
tradeoff between power routing cost and total power
dissipation.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cabodi:2010:SHA,
author = "Gianpiero Cabodi and Luciano Lavagno and Marco
Murciano and Alex Kondratyev and Yosinori Watanabe",
title = "Speeding-up heuristic allocation, scheduling and
binding with {SAT}-based abstraction\slash refinement
techniques",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "12:1--12:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698762",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Hardware synthesis is the process by which
system-level, Register Transfer (RT)-level, or
behavioral descriptions can be turned into real
implementations, in terms of logic gates. Scheduling is
one of the most time-consuming steps in the overall
design flow, and may become much more complex when
performing hardware synthesis from high-level
specifications. Exploiting a single scheduling strategy
on very large designs is often reductive and
potentially inadequate. Furthermore, finding the
``best'' single candidate among all possible scheduling
algorithms is practically infeasible. In this article
we introduce a hybrid scheduling approach that is a
preliminary step towards a comprehensive solution not
yet provided by industrial or by academic solutions.
Our method relies on an abstract symbolic
representation of data flow nodes (operations) bound to
control flow paths: it produces a more realistic lower
bound during the prescheduling resource estimation step
and speeds up slower but accurate heuristic scheduling
techniques, thus achieving a globally improved
result.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "allocation; binding; High level synthesis; resource
estimation; satisfiability; scheduling",
}
@Article{Chang:2010:CPA,
author = "Naehyuck Chang and J{\"o}rg Henkel",
title = "Call for papers: {ACM Transactions on Design
Automation of Electronic Systems (TODAES)} special
section on low-power electronics and design",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "20:1--20:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698770",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Das:2010:TAM,
author = "Dipankar Das and P. P. Chakrabarti and Rajeev Kumar",
title = "Thermal analysis of multiprocessor {SoC} applications
by simulation and verification",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "15:1--15:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698765",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Overheating of computer chips leads to degradation of
performance and reliability. Therefore, preventing
chips from overheating in spite of increased
performance requirements has emerged as a major
challenge. Since the cost of cooling has been rising
steadily, various architecture and application design
techniques are used to prevent chip overheating.
Temperature-aware task scheduling has emerged as an
important application design methodology for addressing
this problem in multiprocessor SoC systems.\par
In this work we present the formulation and
implementation of a method for analyzing the thermal
(chip heating) behavior of a MPSoC task schedule,
during the early stages of the design. We highlight the
challenges in developing such a framework and propose
solutions for tackling them. Due to nondeterminism in
task execution times and decision branches,
multiprocessor applications cannot be evaluated
accurately by the current state-of-the-art {\em
thermal\/} {\em simulation\/} and {\em steady-state\/}
analysis methods. Hence an analysis covering
nondeterministic execution behaviors is required for
thermal analysis of MPSoC task schedules. To address
this issue we propose a model checking-based approach
for solving the thermal analysis problem and formulate
it as a hybrid automata reachability verification
problem. We present an algorithm for constructing this
hybrid automata given the task schedule, a set of power
profiles of tasks, and the Compact Thermal Model (CTM)
of the chip. Information about task power consumption
is inferred from Markov chains which are learned from
power profiles of tasks, obtained from simulation or
emulation runs. A numerical analysis-based algorithm
which uses CounterExample-Guided Abstraction Refinement
(CEGAR) is developed for reachability analysis of this
hybrid automata. We propose a directed simulation
methodology which uses results of a time-bounded
analysis of the hybrid automata modeling thermal
behavior of the application, to simulate the expected
worst-case execution runs of the same. The algorithms
presented in this work have been implemented in a
prototype tool called {\em HeatCheck}. We present
experimental results and analysis of thermal behavior
of a set of task schedules executing on a MPSoC
system.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "chip temperature; hybrid automata; Markov chain;
multiprocessor system-on-chip; Thermal analysis",
}
@Article{Jamieson:2010:BER,
author = "Peter Jamieson and Tobias Becker and Peter Y. K.
Cheung and Wayne Luk and Tero Rissa and Teemu
Pitk{\"a}nen",
title = "Benchmarking and evaluating reconfigurable
architectures targeting the mobile domain",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "14:1--14:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698764",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present the GroundHog 2009 benchmarking suite that
evaluates the power consumption of reconfigurable
technology for applications targeting the mobile
computing domain. This benchmark suite includes seven
designs; one design targets fine-grained FPGA fabrics
allowing for quick state-of-the-art evaluation, and six
designs are specified at a high level allowing them to
target a range of existing and future reconfigurable
technologies. Each of the six designs can be stimulated
with the help of synthetically generated input stimuli
created by an open-source tool included in the
downloadable suite. Another tool is included to help
verify the correctness of each implemented design. To
demonstrate the potential of this benchmark suite, we
evaluate the power consumption of two modern industrial
FPGAs targeting the mobile domain. Also, we show how an
academic FPGA framework, VPR 5.0, that has been updated
for power estimates can be used to estimates the power
consumption of different FPGA architectures and an
open-source CAD flow mapping to these architectures.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "benchmark; Benchmarking; FPGAs; mobile; power",
}
@Article{Kurimoto:2010:PAE,
author = "Masanori Kurimoto and Hiroaki Suzuki and Rei Akiyama
and Tadao Yamanaka and Haruyuki Ohkuma and Hidehiro
Takata and Hirofumi Shinohara",
title = "Phase-adjustable error detection flip-flops with
2-stage hold-driven optimization, slack-based grouping
scheme and slack distribution control for dynamic
voltage scaling",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "17:1--17:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698767",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "For Dynamic Voltage Scaling (DVS), we propose a novel
design methodology. This methodology is composed of an
error detection circuit and three technologies to
reduce the area and power penalties which are the large
issues for the conventional DVS with error detection.
The proposed circuit, Phase-Adjustable Error Detection
Flip-Flip (PEDFF), adjusts the clock phase of an
additional FF for the timing error detection, based on
the timing slack. 2-Stage Hold-Driven Optimization
(2-SHDO) technology splits the hold-driven optimization
in two stages. Slack-Based Grouping Scheme (SBGS)
technology divides each timing path into appropriate
groups based on the timing slack. Slack Distribution
Control (SDC) technology improves the sharp
distribution of the path delay at which the logic
synthesis tool has relaxed the delay. We evaluate the
methodology by simulating a 32-bit microprocessor in 90
nm CMOS technology. The proposed methodology reduces
the energy consumption by 19.8\% compared to non-DVS.
The OR-tree's latency is shortened to 16.3\% compared
to the conventional DVS. The area and power penalties
for delay buffers on short paths are reduced to 35.0\%
and 40.6\% compared to the conventional DVS,
respectively. The proposed methodology with SDC reduces
the energy consumption by 17.0\% on another example
with the sharp slack distribution by the logic
synthesis compared to non-DVS.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "CTS; DVS; Error detection flip-flop; P{\&} R; STA",
}
@Article{Kwon:2010:SPC,
author = "Seongnam Kwon and Soonhoi Ha",
title = "Serialized parallel code generation framework for
{MPSoC}",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698761",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The models of computations that express concurrency
naturally are preferred for initial specification of
MPSoC system, since popular programming languages such
as C and C++ are designed for sequential execution. In
our previous work, we proposed a design framework where
two models are used for the initial specification of
the system behavior; task model at the top level and
dataflow model inside each task. After the partition
and mapping process is performed with each architecture
candidate, the target code is automatically generated
for both Design-Space Exploration (DSE) and final
implementation. In this article, we focus on parallel
code generation for MPSoC, proposing two main
techniques. The first is to express functional and data
parallelism differently following the partition and
mapping decision. In the proposed technique, the
generated code consists of multiple tasks running
concurrently, which achieves functional parallelism. On
the other hand, we use OpenMP directives to express
data parallelism inside a task. Second is to adopt the
code serialization technique to execute a multitasking
application without OS scheduler, aiming to generate
the highly portable code on various platforms for an
efficient DSE process. We extend the previous code
serialization techniques to multiprocessor systems and
utilize the formal properties of the dataflow model for
efficient code generation. The experiments including
H.263 codec example show the viability of the proposed
technique and the efficiency of the generated code.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design-space exploration; Embedded software;
multiprocessor system on chip; parallel programming;
software generation",
}
@Article{Li:2010:PAL,
author = "Duo Li and Sheldon X.-D. Tan and Eduardo H. Pacheco
and Murli Tirumala",
title = "Parameterized architecture-level dynamic thermal
models for multicore microprocessors",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "16:1--16:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698766",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a new architecture-level
parameterized dynamic thermal behavioral modeling
algorithm for emerging thermal-related design and
optimization problems for high-performance multicore
microprocessor design. We propose a new approach,
called {\em ParThermPOF}, to build the parameterized
thermal performance models from the given accurate
architecture thermal and power information. The new
method can include a number of variable parameters such
as the locations of thermal sensors in a heat sink,
different components (heat sink, heat spreader, core,
cache, etc.), thermal conductivity of heat sink
materials, etc. The method consists of two steps:
first, a response surface method based on low-order
polynomials is applied to build the parameterized
models at each time point for all the given sampling
nodes in the parameter space. Second, an improved
Generalized Pencil-Of-Function (GPOF) method is
employed to build the transfer-function-based
behavioral models for each time-varying coefficient of
the polynomials generated in the first step.
Experimental results on a practical quad-core
microprocessor show that the generated parameterized
thermal model matches the given data very well. The
compact models by ParThermPOF offer two order of
magnitudes speedup over the commercial thermal analysis
tool {\em FloTHERM\/} on the given examples.
ParThermPOF is very suitable for design space
exploration and optimization where both time and system
parameters need to be considered.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "architecture; behavioral modeling;
chip-multiprocessor; Multicore; thermal modeling",
}
@Article{Paul:2010:LOC,
author = "Somnath Paul and Hamid Mahmoodi and Swarup Bhunia",
title = "Low-overhead {$ F_{\hbox {max}} $} calibration at
multiple operating points using delay-sensitivity-based
path selection",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698769",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Maximum operating frequency ({\em F\/}$_{{\em max \/
}}$) of a system often needs to be determined at
multiple operating points, defined by voltage and
temperatures. Such calibration is important for the
speed binning process, where the voltage-frequency (V-
{\em F\/}$_{{\em max \/ }}$) relation needs to be
accurately determined to sort chips into different bins
that can be used for different applications. Moreover,
adaptive systems typically require {\em F\/}$_{{\em max
\/ }}$ calibration at multiple operating points in
order to dynamically change operating condition such as
supply voltage or body bias for power, temperature, or
throughput management. For example, a Dynamic Voltage
and Frequency Scaling (DVFS) system requires accurate
delay calibration at multiple operating voltages in
order to apply the correct operating frequency
corresponding to a scaled supply. In this article, we
propose a low-overhead design technique that allows
efficient characterization of {\em F\/}$_{{\em max \/
}}$ at different operating voltages and temperatures.
The proposed method selects a set of representative
timing paths in a circuit based on their temperature
and voltage sensitivities and dynamically configures
them into a ring oscillator to compute the critical
path delay. Compared to existing {\em F\/}$_{{\em max
\/ }}$ calibration approaches, the proposed approach
provides the following two main advantages: (1) it
introduces a delay sensitivity metric to isolate few
representative timing paths; (2) it considers actual
timing paths instead of critical path replicas, thereby
accounting for local within-die delay variations. The
all-digital calibration method is robust under process
variations and achieves high delay estimation accuracy
(> 4\% error) at the cost of negligible design overhead
(1.7\% in delay, 0.3\% in power, and 3.5\% in
die-area).",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "F max calibration; frequency binning; temperature
adaptation",
}
@Article{Reviriego:2010:RAM,
author = "Pedro Reviriego and Juan Antonio Maestro and Chris J.
Bleakley",
title = "Reliability analysis of memories protected with {BICS}
and a per-word parity bit",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698768",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents an analysis of the reliability
of memories protected with Built-in Current Sensors
(BICS) and a per-word parity bit when exposed to Single
Event Upsets (SEUs). Reliability is characterized by
Mean Time to Failure (MTTF) for which two analytic
models are proposed. A simple model, similar to the one
traditionally used for memories protected with
scrubbing, is proposed for the low error rate case. A
more complex Markov model is proposed for the high
error rate case. The accuracy of the models is checked
using a wide set of simulations. The results presented
in this article allow fast estimation of MTTF enabling
design of optimal memory configurations to meet
specified MTTF goals at minimum cost. Additionally the
power consumption of memories protected with BICS is
compared to that of memories using scrubbing in terms
of the number of read cycles needed in both
configurations.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "built-in current sensors; Error correcting codes;
Fault-tolerant memory; high-level protection
technique",
}
@Article{Schirner:2010:FAP,
author = "Gunar Schirner and Andreas Gerstlauer and Rainer
D{\"o}mer",
title = "Fast and accurate processor models for efficient
{MPSoC} design",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698760",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With growing system complexity and ever-increasing
software content, the development of embedded software
for upcoming MPSoC architectures is a tremendous
challenge. Traditional ISS-based validation becomes
infeasible due to the large complexity.\par
Addressing the need for flexible and fast simulating
models, we introduce in this article our approach of
abstract processor modeling in the context of
multiprocessor architectures. We combine modeling of
computation on processors with an abstract RTOS and
accurate interrupt handling into a versatile,
multifaceted processor model with several levels of
features.\par
Our processor models are utilized in a framework
allowing designers to develop a system in a top-down
manner using automatic model generation and compilation
down to a given MPSoC architecture. During generation,
instances of our processor models are integrated into a
system model combining software, hardware, and bus
communication. The generated system model serves for
rapid design space exploration and a fast and accurate
system validation.\par
Our experimental results show the benefits of our
processor modeling using an actual multiprocessor
mobile phone baseband platform. Our abstract models of
this complex system reach a simulation speed of
300MCycles/s within a high accuracy of less than 3\%
error. In addition, our results quantify the
speed/accuracy trade-off at varying abstraction levels
of our models to guide future processor model
designers.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "MPSoC; multi-processor system-on-chip; performance
prediction/estimation; Processor modeling; system-level
design; TLM; transaction-level model",
}
@Article{Yuan:2010:HSP,
author = "Mingxuan Yuan and Zonghua Gu and Xiuqiang He and Xue
Liu and Lei Jiang",
title = "Hardware\slash software partitioning and pipelined
scheduling on runtime reconfigurable {FPGAs}",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "13:1--13:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698763",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "FPGAs are widely used in today's embedded systems
design due to their low cost, high performance, and
reconfigurability. Partially RunTime-Reconfigurable
(PRTR) FPGAs, such as Virtex-2 Pro and Virtex-4 from
Xilinx, allow part of the FPGA area to be reconfigured
while the remainder continues to operate without
interruption, so that HW tasks can be placed and
removed dynamically at runtime. We address two problems
related to HW task scheduling on PRTR FPGAs: (1) HW/SW
partitioning. Given an application in the form of a
task graph with known execution times on the HW (FPGA)
and SW (CPU), and known area sizes on the FPGA, find an
valid allocation of tasks to either HW or SW and a
static schedule with the optimization objective of
minimizing the total schedule length (makespan). (2)
Pipelined scheduling. Given an input task graph,
construct a pipelined schedule on a PRTR FPGA with the
goal of maximizing system throughput while meeting a
given end-to-end deadline. Both problems are NP-hard.
Satisfiability Modulo Theories (SMT) is an extension to
SAT by adding the ability to handle arithmetic and
other decidable theories. We use the SMT solver Yices
with Linear Integer Arithmetic (LIA) theory as the
optimization engine for solving the two scheduling
problems. In addition, we present an efficient
heuristic algorithm based on kernel recognition for the
pipelined scheduling problem, a technique borrowed from
SW pipelining, to overcome the scalability problem of
the SMT-based optimal solution technique.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "HW/SW partitioning; runtime reconfigurable FPGA;
scheduling",
}
@Article{Blanc:2010:RAS,
author = "Nicolas Blanc and Daniel Kroening",
title = "Race analysis for {SystemC} using model checking",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "21:1--21:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754406",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "SystemC is a system-level modeling language that
offers a wide range of features to describe concurrent
systems at different levels of abstraction. The SystemC
standard permits simulators to implement a
deterministic scheduling policy, which often hides
concurrency-related design flaws. We present a novel
compiler for SystemC that integrates a very precise
formal race analysis by means of model checking. Our
compiler produces a simulator that uses the outcome of
the analysis to perform partial order reduction. The
key insight to make the model checking engine scale is
to apply it only to tiny fractions of the SystemC
model. We show that the outcome of the analysis is not
only valuable to eliminate redundant context switches
at runtime, but can also be used to diagnose race
conditions statically. In particular, our analysis is
able to reveal races that can remain undetected during
simulation and is able to formally prove the absence of
races.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "formal analysis; model checking; partial-order
reduction; simulation; SystemC",
}
@Article{Ahmed:2010:CBP,
author = "Waseem Ahmed and Douglas Myers",
title = "Concept-based partitioning for large multidomain
multifunctional embedded systems",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "22:1--22:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754407",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Hardware-software partitioning is an important phase
in embedded systems. Decisions made during this phase
impact the quality, cost, performance, and the delivery
date of the final product. Over the past decade or
more, various partitioning approaches have been
proposed. A majority operate at a relatively fine
granularity and use a low-level executable
specification as the starting point. This presents
problems if the context is families of industrial
products with frequent release of upgraded or new
members. Managing complexity using a low-level
specification is extremely challenging and impacts
developer productivity. Designing using a high-level
specification and component-based development, although
a better option, imposes component integration and
replacement problems during system evolution and new
product release. A new approach termed Concept-Based
Partitioning is presented that focuses on system
evolution, product lines, and large-scale reuse when
partitioning. Beginning with information from UML 2.0
sequence diagrams and a concept repository concepts are
identified and used as the unit of partitioning within
a specification. A methodology for the refinement of
interpart communication in the system specification
using sequence diagrams is also presented. Change
localization during system evolution, composability
during large-scale reuse, and provision for
configurable feature variations for a product line are
facilitated by a Generic Adaptive Layer (GAL) around
selected concepts. The methodology was applied on a
subsystem of an Unmanned Aerial Vehicle (UAV) using
various concepts which improved the composability of
concepts while keeping performance and size overhead
within the 2\% range.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Codesign; embedded system design; product families;
system evolution; system partitioning; UML",
}
@Article{Raval:2010:LPT,
author = "R. K. Raval and C. H. Fernandez and C. J. Bleakley",
title = "Low-power {TinyOS} tuned processor platform for
wireless sensor network motes",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "23:1--23:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754408",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article we describe a low-power processor
platform for use in Wireless Sensor Network (WSN) nodes
(motes). WSN motes are small, battery-powered devices
comprised of a processor, sensors, and a radio
frequency transceiver. It is expected that WSNs
consisting of large numbers of motes will offer
long-term, distributed monitoring, and control of
real-world equipment and phenomena. A key requirement
for these applications is long battery life. We
investigate a processor platform architecture based on
an application-specific programmable processor core,
System-On-Chip bus, and a hardware accelerator. The
architecture improves on the energy consumption of a
conventional microprocessor design by tuning the
architecture for a suite of TinyOS-based WSN
applications. The tuning method used minimizes changes
to the instruction set architecture facilitating rapid
software migration to the new platform. The processor
platform was implemented and validated in an FPGA-based
WSN mote. The benefits of the approach in terms of
energy consumption are estimated to be a reduction of
48\% for ASIC implementation relative to a conventional
programmable processor for a typical TinyOS application
suite without use of voltage scaling.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Embedded system design; hardware-software codesign;
low power processor; Wireless Sensor Network",
}
@Article{Guan:2010:RFP,
author = "Xuan Guan and Yunsi Fei",
title = "Register file partitioning and recompilation for
register file power reduction",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "24:1--24:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754409",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Register files in modern embedded processors
contribute a substantial budget in the energy
consumption due to their large switching capacitance
and long working time. For some embedded processors, on
average 25\% of registers account for 83\% of register
file accessing time. This motivates us to partition the
register file into hot and cold regions, with the most
frequently used registers placed in the hot region, and
the rarely accessed ones in the cold region. We employ
the bit-line splitting and drowsy register cell
techniques to reduce the overall register file
accessing power. We propose a novel approach to
partition the register in a way that can achieve the
largest power saving. We formulate the register file
partitioning process into a graph partitioning problem,
and apply an effective algorithm to obtain the optimal
result. We evaluate our algorithm for MiBench and
SPEC2000 applications on the SimpleScalar PISA system,
and an average saving of 58.3\% and 54.4\% over the
nonpartitioned register file accessing power is
achieved. The area overhead is negligible, and the
execution time overhead is acceptable (5.5\% for
MiBench 2.4\% for SPEC2000). Further evaluation for
MiBench applications is performed on Alpha and X86
system.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "compilers; Low-power design; processor architectures;
register file partitioning",
}
@Article{Zhang:2010:CSD,
author = "Yufu Zhang and Ankur Srivastava and Mohamed Zahran",
title = "On-chip sensor-driven efficient thermal profile
estimation algorithms",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "25:1--25:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754410",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article addresses the problem of chip-level
thermal profile estimation using runtime temperature
sensor readings. We address the challenges of: (a)
availability of only a few thermal sensors with
constrained locations (sensors cannot be placed just
anywhere); (b) random chip power density
characteristics due to unpredictable workloads and
fabrication variability. Firstly we model the random
power density as a probability density function. Given
such statistical characteristics and the runtime
thermal sensor readings, we exploit the correlation in
power dissipation among different chip modules to
estimate the expected value of temperature at each chip
location. Our methods are optimal if the underlying
power density has Gaussian nature. We give a heuristic
method to estimate the chip-level thermal profile when
the underlying randomness is non-Gaussian. An extension
of our method has also been proposed to address the
dynamic case. Several speedup strategies are carefully
investigated to improve the efficiency of the
estimation algorithm. Experimental results indicated
that, given only a few thermal sensors, our method can
generate highly accurate chip-level thermal profile
estimates within a few milliseconds.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "estimation; on-chip sensor; statistical; Thermal
profile",
}
@Article{Chang:2010:LSC,
author = "Kai-Hui Chang and Valeria Bertacco and Igor L. Markov
and Alan Mishchenko",
title = "Logic synthesis and circuit customization using
extensive external don't-cares",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "26:1--26:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754411",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Traditional digital circuit synthesis flows start from
an HDL behavioral definition and assume that circuit
functions are almost completely defined, making
don't-care conditions rare. However, recent design
methodologies do not always satisfy these assumptions.
For instance, third-party IP blocks used in a
system-on-chip are often overdesigned for the
requirements at hand. By focusing only on the input
combinations occurring in a specific application, one
could resynthesize the system to greatly reduce its
area and power consumption. Therefore we extend modern
digital synthesis with a novel technique, called SWEDE,
that makes use of extensive external don't-cares. In
addition, we utilize such don't-cares present
implicitly in existing simulation-based verification
environments for circuit customization. Experiments
indicate that SWEDE scales to large ICs with
half-million input vectors and handles practical cases
well.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Circuit customization; don't-care optimization; logic
synthesis",
}
@Article{Liu:2010:ECR,
author = "Shenghua Liu and Guoqiang Chen and Tom Tong Jing and
Lei He and Robi Dutta and Xian-Long Hong",
title = "Effective congestion reduction for {IC} package
substrate routing",
journal = j-TODAES,
volume = "15",
number = "3",
pages = "27:1--27:??",
month = may,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1754405.1754412",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jun 21 17:21:11 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Off-chip substrate routing for high-density packages
is challenging due to requirements such as high
density, lack of vertical detour, non-Manhattan
routing, and primarily planar routing. The existing
substrate routing algorithms often result in a large
number of unrouted nets that have to be routed
manually. This article develops an effective yet
efficient diffusion-driven method D-Router to reduce
congestion. Starting with an initial routing, we
develop an effective diffusion-based congestion
reduction. We iteratively find a congested window and
spread out connections to reduce congestion inside the
window by a simulated diffusion process based on the
duality between congestion and concentration. The
window is released after the congestion is eliminated.
Compared with the state-of-the-art substrate routing
method that leads to 480 nets unrouted for ten
industrial designs with a total of 6415 nets, the
D-Router reduces the amount of unrouted nets to 104, a
reduction to the 4.6 multiple. In addition, the
D-Router obtains a similar reduction on unrouted nets
but runs up to 94 times faster when compared with a
negotiation-based substrate routing.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "congestion reduction; IC package; routability;
routing; substrate",
}
@Article{Shin:2010:PGC,
author = "Youngsoo Shin and Jun Seomun and Kyu-Myung Choi and
Takayasu Sakurai",
title = "Power gating: {Circuits}, design methodologies, and
best practice for standard-cell {VLSI} designs",
journal = j-TODAES,
volume = "15",
number = "4",
pages = "28:1--28:??",
month = sep,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1835420.1835421",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 6 09:42:42 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power Gating has become one of the most widely used
circuit design techniques for reducing leakage current.
Its concept is very simple, but its application to
standard-cell VLSI designs involves many careful
considerations. The great complexity of designing a
power-gated circuit originates from the side effects of
inserting current switches, which have to be resolved
by a combination of extra circuitry and customized
tools and methodologies. In this tutorial we survey
these design considerations and look at the best
practice within industry and academia. Topics include
output isolation and data retention, current switch
design and sizing, and physical design issues such as
power networks, increases in area and wirelength, and
power grid analysis. Designers can benefit from this
tutorial by obtaining a better understanding of
implications of power gating during an early stage of
VLSI designs. We also review the ways in which power
gating has been improved. These include reducing the
sizes of switches, cutting transition delays, applying
power gating to smaller blocks of circuitry, and
reducing the energy dissipated in mode transitions.
Power Gating has also been combined with other circuit
techniques, and these hybrids are also reviewed.
Important open problems are identified as a stimulus to
research.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design methodology; leakage current; low power; Power
gating; standard-cell; VLSI",
}
@Article{Yu:2010:PSA,
author = "Cheng-Juei Yu and Yi-Hsin Wu and Sheng-De Wang",
title = "An in-place search algorithm for the resource
constrained scheduling problem during high-level
synthesis",
journal = j-TODAES,
volume = "15",
number = "4",
pages = "29:1--29:??",
month = sep,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1835420.1835422",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 6 09:42:42 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose an in-place search algorithm for computing
the exact solutions to the resource constrained
scheduling problem. This algorithm supports operation
chaining, pipelining and multicycling in the underlying
scheduling problem. Based on two lower-bound estimation
mechanisms that are capable of predicting the criterion
values of search nodes represented by partially
scheduled data flow graphs, the proposed algorithm can
effectively prune the nonpromising search space and
finds the optimum usually several times faster than
existing techniques. As opposed to existing
search-based scheduling techniques whose space
complexity is squared or exponential in the search
depth, our approach requires only a constant storage
space during the traversal of the search tree. The low
space complexity is accomplished by using a
combination-generating algorithm, which leads our
approach to visit search nodes in such a way that each
one is obtained by making only a small change to its
sibling without keeping any parent nodes in memory.
Experimental results on several well known benchmarks
with varying resource constraints show the
effectiveness of the proposed algorithm.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "Design automation; exact scheduling; high-level
synthesis; optimal scheduling; resource-constrained
scheduling",
}
@Article{Lee:2010:PTP,
author = "Kyoungwoo Lee and Aviral Shrivastava and Nikil Dutt
and Nalini Venkatasubramanian",
title = "Partitioning techniques for partially protected caches
in resource-constrained embedded systems",
journal = j-TODAES,
volume = "15",
number = "4",
pages = "30:1--30:??",
month = sep,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1835420.1835423",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 6 09:42:42 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Increasing exponentially with technology scaling, the
soft error rate even in earth-bound embedded systems
manufactured in deep subnanometer technology is
projected to become a serious design consideration.
Partially protected cache (PPC) is a promising
microarchitectural feature to mitigate failures due to
soft errors in power, performance, and cost sensitive
embedded processors. A processor with PPC maintains two
caches, one protected and the other unprotected, both
at the same level of memory hierarchy. The intuition
behind PPCs is that not all data in the application is
equally prone to soft errors. By finding and mapping
the data that is more prone to soft errors to the
protected cache, and error-resilient data to the
unprotected cache, failures induced by soft errors can
be significantly reduced at a minimal power and
performance penalty. Consequently, the effectiveness of
PPCs critically hinges on the compiler's ability to
partition application data into error-prone and
error-resilient data. The effectiveness of PPCs has
previously been demonstrated on multimedia applications
--- where an obvious partitioning of data exists, the
multimedia data is inherently resilient to soft errors,
and the rest of the data and the entire code is assumed
to be error-prone. Since the amount of multimedia data
is a quite significant component of the entire
application data, this obvious partitioning is quite
effective. However, no such obvious data and code
partitioning exists for general applications. This
severely restricts the applicability of PPCs to data
caches and instruction caches in general. This article
investigates vulnerability-based partitioning schemes
that are applicable to applications in general and
effectively reduce failures due to soft errors at
minimal power and performance overheads.\par
Our experimental results on an HP iPAQ-like processor
enhanced with PPC architecture, running benchmarks from
the MiBench suite demonstrate that our partitioning
heuristic efficiently finds page partitions for data
PPCs that can reduce the failure rate by 48\% at only
2\% performance and 7\% energy overhead, and finds page
partitions for instruction PPCs that reduce the failure
rate by 50\% at only 2\% performance and 8\% energy
overhead, on average.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "embedded systems; Page partitioning technique;
partially protected cache; soft error; vulnerability",
}
@Article{Bonny:2010:HBC,
author = "Talal Bonny and J{\"o}rg Henkel",
title = "{Huffman}-based code compression techniques for
embedded processors",
journal = j-TODAES,
volume = "15",
number = "4",
pages = "31:1--31:??",
month = sep,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1835420.1835424",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 6 09:42:42 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The size of embedded software is increasing at a rapid
pace. It is often challenging and time consuming to fit
an amount of required software functionality within a
given hardware resource budget. Code compression is a
means to alleviate the problem by providing substantial
savings in terms of code size. In this article we
introduce a novel and efficient hardware-supported
compression technique that is based on Huffman Coding.
Our technique reduces the size of the generated
decoding table, which takes a large portion of the
memory. It combines our previous techniques,
Instruction Splitting Technique and Instruction
Re-encoding Technique into new one called Combined
Compression Technique to improve the final compression
ratio by taking advantage of both previous techniques.
The instruction Splitting Technique is instruction set
architecture (ISA)-independent. It splits the
instructions into portions of varying size (called
patterns) before Huffman coding is applied. This
technique improves the final compression ratio by more
than 20\% compared to other known schemes based on
Huffman Coding. The average compression ratios achieved
using this technique are 48\% and 50\% for ARM and
MIPS, respectively. The Instruction Re-encoding
Technique is ISA-dependent. It investigates the
benefits of reencoding unused bits (we call them
reencodable bits) in the instruction format for a
specific application to improve the compression ratio.
Reencoding those bits can reduce the size of decoding
tables by up to 40\%. Using this technique, we improve
the final compression ratios in comparison to the first
technique to 46\% and 45\% for ARM and MIPS,
respectively (including all overhead that incurs). The
Combined Compression Technique improves the compression
ratio to 45\% and 42\% for ARM and MIPS, respectively.
In our compression technique, we have conducted
evaluations using a representative set of applications
and we have applied each technique to two major
embedded processor architectures, namely ARM and
MIPS.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "code compression; code density; Embedded systems;
Huffman coding",
}
@Article{Li:2010:CPG,
author = "Zhifang Li and Wenjian Luo and Lihua Yue and Xufa
Wang",
title = "On the completeness of the polymorphic gate set",
journal = j-TODAES,
volume = "15",
number = "4",
pages = "32:1--32:??",
month = sep,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1835420.1835425",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 6 09:42:42 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Polymorphic gates are special kinds of logic gates
that can exhibit different functions under the control
of environmental parameters, such as light,
temperature, and VDD. These polymorphic gates can be
used to build polymorphic circuits that perform
different functions under different environments.
Because polymorphic gates are different from
traditional logic gates, the existent completeness
theory for the traditional logic gate set is not
suitable for the polymorphic gate set. So far, only the
definition of the complete polymorphic gate set is
given. There is no approach to judging whether a given
polymorphic gate set is complete. The contributions of
this article include three aspects. First, the impact
of logic-1 and logic-0 on the completeness of the
polymorphic gate set is discussed. Second, the theory
and two related algorithms for judging the completeness
of polymorphic gate sets with two modes are given.
Finally, the theory and related algorithms for complete
polymorphic gate sets with more than two modes are
proposed.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "completeness theory; polymorphic circuit; Polymorphic
electronics; polymorphic gate",
}
@Article{Wang:2010:CDF,
author = "Renshen Wang and Evangeline Young and Chung-Kuan
Cheng",
title = "Complexity of {$3$-D} floorplans by analysis of graph
cuboidal dual hardness",
journal = j-TODAES,
volume = "15",
number = "4",
pages = "33:1--33:??",
month = sep,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1835420.1835426",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Oct 6 09:42:42 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Interconnect dominated electronic design stimulates a
demand for developing circuits on the third dimension,
leading to 3-D integration. Recent advances in chip
fabrication technology enable 3-D circuit
manufacturing. However, there is still a possible
barrier of design complexity in exploiting 3-D
technologies. This article discusses the impact of
migrating from 2-D to 3-D on the difficulty of
floorplanning and placement. By looking at a basic
formulation of the graph cuboidal dual problem, we show
that the 3-D cases and the 3-layer 2.5-D cases are
fundamentally more difficult than the 2-D cases in
terms of computational complexity. By comparison among
these cases, the intrinsic complexity in 3-D floorplan
structures is revealed in the hard-to-decide relations
between topological connections and geometrical
contacts. The results show possible challenges in the
future for physical design and CAD of 3-D integrated
circuits.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "3-D integration; cuboidal dual; floorplanning;
hardness",
}
@Article{Chang:2010:GEC,
author = "Naehyuck Chang and J{\"o}rg Henkel",
title = "Guest Editorial: Current Trends in Low-Power Design",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "1:1--1:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870110",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bol:2010:NME,
author = "David Bol and Denis Flandre and Jean-Didier Legat",
title = "Nanometer {MOSFET} Effects on the Minimum-Energy Point
of Sub-45nm Subthreshold Logic---Mitigation at
Technology and Circuit Levels",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "2:1--2:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870111",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Subthreshold operation of digital circuits enables
minimum energy consumption. In this article, we observe
that minimum energy Emin of subthreshold logic
dramatically increases when reaching 45nm CMOS node. We
demonstrate by circuit simulation and analytical
modeling that this increase comes from the combined
effects of variability, gate leakage, and Drain-Induced
Barrier Lowering (DIBL) effect. We then investigate the
new impact of individual MOSFET parameters Lg, Vt, and
Tox on Emin in sub-45nm technologies. We further
propose an optimum MOSFET selection, which favors
low-Vt mid-Lg devices in 45nm CMOS technology. The use
of such optimum MOSFETs yields 35\% Emin reduction for
a benchmark multiplier with good speed performances and
negligible area overhead.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Calimera:2010:NAC,
author = "Andrea Calimera and Enrico Macii and Massimo Poncino",
title = "{NBTI}-Aware Clustered Power Gating",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "3:1--3:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870112",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The emergence of Negative Bias Temperature Instability
(NBTI) as the most relevant source of reliability in
sub-90nm technologies has led to a new facet of the
traditional trade-off between power and reliability.
NBTI effects in fact manifest themselves as an increase
of the propagation delay of the devices over time,
which adds up to the delay penalty incurred by most
low-power design solutions. This implies that, given a
desired lifetime of a circuit (i.e., a given
performance target at some point in time), a
power-managed component will fail earlier than a
nonpower-managed one. In this work, we show how it is
possible to partially overcome this conflict, by
leveraging the benefits in terms of aging provided by
power-gating (i.e., by using switches that disconnect a
logic block from the ground).",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cong:2010:BLO,
author = "Jason Cong and Bin Liu and Rupak Majumdar and Zhiru
Zhang",
title = "Behavior-Level Observability Analysis for Operation
Gating in Low-Power Behavioral Synthesis",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "4:1--4:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870113",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Many techniques for power reduction in advanced RTL
synthesis tools rely explicitly or implicitly on
observability don't-care conditions. In this article we
propose a systematic approach to maximize the
effectiveness of these techniques by generating
power-friendly RTL descriptions in behavioral
synthesis. This is done using operation gating, that
is, explicitly adding a predicate to an operation based
on its observability condition, so that the operation,
once identified as unobservable at runtime, can be
avoided using RTL power optimization techniques such as
clock gating. We first introduce the concept of
behavior-level observability and its approximations in
the context of behavioral synthesis. We then propose an
efficient procedure to compute an approximated
behavior-level observability of every operation in a
dataflow graph.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Thorolfsson:2010:LPH,
author = "Thorlindur Thorolfsson and Samson Melamed and W. Rhett
Davis and Paul D. Franzon",
title = "Low-Power Hypercube Divided Memory {FFT} Engine Using
{$3$D} Integration",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "5:1--5:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870114",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article we demonstrate a floating point FFT
processor that leverages both 3D integration and a
unique hypercube memory division scheme to reduce the
power consumption of a 1024 point FFT down to 4.227$
\mu $J. The hypercube memory division scheme lowers the
energy per memory access by 59.2\% and increases the
total required area by 16.8\%. The use of 3D
integration reduces the logic power by 5.2\%. We
describe the tool flow required to realize the 3D
implementation and perform a thermal analysis of it.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dhiman:2010:VSE,
author = "Gaurav Dhiman and Giacomo Marchetti and Tajana
Rosing",
title = "{vGreen}: a System for Energy-Efficient Management of
Virtual Machines",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "6:1--6:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870115",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present vGreen, a multitiered
software system for energy-efficient virtual machine
management in a clustered virtualized environment. The
system leverages the use of novel hierarchical metrics
that work across the different abstractions in a
virtualized environment to capture power and
performance characteristics of both the virtual and
physical machines. These characteristics are then used
to implement policies for scheduling and power
management of virtual machines across the cluster.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2010:EEP,
author = "Jinsik Kim and Pai H. Chou",
title = "Energy-Efficient Progressive Remote Update for
Flash-Based Firmware of Networked Embedded Systems",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "7:1--7:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870116",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Firmware update over a network connection is an
essential but expensive feature for many embedded
systems due to not only the relatively high power
consumption and limited bandwidth, but also
page-granular erasure before rewriting to flash memory.
This work proposes a page-level, link-time technique
that minimizes not only the size of patching scripts
but also perturbation to the firmware memory, over the
entire sequence of updates in the system's lifetime. We
propose a tool that first clusters functions to
minimize caller-callee dependency across pages, and
then orders the functions within each page to minimize
intrapage perturbation. Experimental results show our
technique to reduce the energy consumption of firmware
update by 30--42\% over the state-of-the-art.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yu:2010:EPE,
author = "Chenjie Yu and Peter Petrov",
title = "Energy- and Performance-Efficient Communication
Framework for Embedded {MPSoCs} through
Application-Driven Release Consistency",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "8:1--8:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870117",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a framework for performance-, bandwidth-,
and energy-efficient intercore communication in
embedded MultiProcessor Systems-on-a-Chip (MPSoC). The
methodology seamlessly integrates compiler, operating
system, and hardware support to achieve a low-cost
communication between synchronized producers and
consumers. The technique is especially beneficial for
data-streaming applications exploiting pipeline
parallelism with computational phases mapped to
separate cores. Code transformations utilizing a simple
ISA support ensure that producer writes are propagated
to consumers with a single interconnect transaction per
cache block just prior to the producer exiting its
synchronization region. Furthermore, in order to
completely eliminate misses to shared data caused by
interference with private data and also to minimize the
cache energy, we integrate to the proposed framework a
cache way partitioning policy based on a simple cache
configurability support, which isolates the shared
buffers from other cache traffic.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jayakumar:2010:SIV,
author = "Nikhil Jayakumar and Sunil P. Khatri",
title = "A Simultaneous Input Vector Control and Circuit
Modification Technique to Reduce Leakage with Zero
Delay Penalty",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "9:1--9:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870118",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Leakage power currently comprises a large fraction of
the total power consumption of an IC. Techniques to
minimize leakage have been researched widely. However,
most approaches to reducing leakage have an associated
performance penalty. In this article, we present an
approach which minimizes leakage by simultaneously
modifying the circuit while deriving the input vector
that minimizes leakage. In our approach, we selectively
modify a gate so that its output (in sleep mode) is in
a state which helps minimize the leakage of other gates
in its transitive fanout. Gate replacement is performed
in a slack-aware manner, to minimize the resulting
delay penalty.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2010:SCR,
author = "Yu-Ze Wu and Mango C.-T. Chao",
title = "Scan-Cell Reordering for Minimizing Scan-Shift Power
Based on Nonspecified Test Cubes",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "10:1--10:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870119",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents several scan-cell reordering
techniques to reduce the signal transitions during the
test mode while preserving the don't-care bits in the
test patterns for a later optimization. Combined with a
pattern-filling technique, the proposed scan-cell
reordering techniques can utilize both high response
correlations and pattern correlations to simultaneously
minimize scan-out and scan-in transitions. Those
scan-shift transitions can be further reduced by
selectively using the inverse connections between scan
cells. In addition, the trade-off between routing
overhead and power consumption can also be controlled
by the proposed scan-cell reordering techniques.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Singh:2010:AJE,
author = "Montek Singh and Steven M. Nowick",
title = "{ACM Journal on Emerging Technologies in Computing
Systems}",
journal = j-TODAES,
volume = "16",
number = "1",
pages = "11:1--11:??",
month = nov,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1870109.1870120",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 9 11:12:21 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pedram:2011:CPV,
author = "Massoud Pedram",
title = "Call for papers: Verification issue and challenges
with multicore systems",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "12:1--12:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929944",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bernasconi:2011:DRB,
author = "Anna Bernasconi and Valentina Ciriani",
title = "Dimension-reducible {Boolean} functions based on
affine spaces",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "13:1--13:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929945",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We define and study a new class of regular Boolean
functions called D-reducible. A D-reducible function,
depending on all its n input variables, can be studied
and synthesized in a space of dimension strictly
smaller than n. We show that the D-reducibility
property can be efficiently tested, in time polynomial
in the representation of f, that is, an initial SOP
form of f. A D-reducible function can be efficiently
decomposed, giving rise to a new logic form, that we
have called DredSOP. This form is shown here to be
generally smaller than the corresponding minimum SOP
form.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2011:OAE,
author = "Yi Wang and Hui Liu and Duo Liu and Zhiwei Qin and
Zili Shao and Edwin H.-M. Sha",
title = "Overhead-aware energy optimization for real-time
streaming applications on multiprocessor
{System-on-Chip}",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "14:1--14:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929946",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we focus on solving the energy
optimization problem for real-time streaming
applications on multiprocessor System-on-Chip by
combining task-level coarse-grained software pipelining
with DVS (Dynamic Voltage Scaling) and DPM (Dynamic
Power Management) considering transition overhead,
inter-core communication and discrete voltage levels.
We propose a two-phase approach to solve the problem.
In the first phase, we propose a coarse-grained task
parallelization algorithm called RDAG to transform a
periodic dependent task graph into a set of independent
tasks by exploiting the periodic feature of streaming
applications. In the second phase, we propose a
scheduling algorithm, GeneS, to optimize energy
consumption. GeneS is a genetic algorithm that can
search and find the best schedule within the solution
space generated by gene evolution.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cong:2011:AMP,
author = "Jason Cong and Wei Jiang and Bin Liu and Yi Zou",
title = "Automatic memory partitioning and scheduling for
throughput and power optimization",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "15:1--15:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929947",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Memory bottleneck has become a limiting factor in
satisfying the explosive demands on performance and
cost in modern embedded system design. Selected
computation kernels for acceleration are usually
captured by nest loops, which are optimized by
state-of-the-art techniques like loop tiling and loop
pipelining. However, memory bandwidth bottlenecks
prevent designs from reaching optimal throughput with
respect to available parallelism. In this paper we
present an automatic memory partitioning technique
which can efficiently improve throughput and reduce
energy consumption of pipelined loop kernels for given
throughput constraints and platform requirements. Also,
our proposed algorithm can handle general array access
beyond affine array references.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2011:MUT,
author = "Guihai Yan and Yinhe Han and Hui Liu and Xiaoyao Liang
and Xiaowei Li",
title = "{MicroFix}: Using timing interpolation and delay
sensors for power reduction",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "16:1--16:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929948",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Traditional DVFS schemes are oblivious to fine-grained
adaptability resulting from path-grained timing
imbalance. With the awareness of such fine-grained
adaptability, better power-performance efficiency can
be obtained. We propose a new scheme, MicroFix, to
exploit such fine-grained adaptability. We first show
the potential resulted from the path-grained timing
imbalance and then present a new technique, Timing
Interpolation, to reap the fine-grained adaptability
for power reduction. Moreover, to eliminate the
conservative margins of traditional DVFS, unlike the
previous approaches such as Razor that reactively
handle the delay errors (induced by aggressively scaled
voltage/frequency) by enabling error detection and
recovery, we propose a proactive approach by error
prediction, thereby obviate the high-cost recovery
routines.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2011:RSA,
author = "Irith Pomeranz and Sudhakar M. Reddy",
title = "Reducing the switching activity of test sequences
under transparent-scan",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929949",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Transparent-scan is a test application scheme for scan
circuits. It provides unique opportunities for test
compaction that do not exist with the standard test
application scheme. We show that it also provides
unique opportunities for reducing the power dissipation
of a scan-based test set. After translating a standard
scan-based test set into a transparent-scan sequence,
we apply two operations for reducing the power
dissipation of the sequence. The first operation
attempts to remove a test vector that causes high power
dissipation. The second operation attempts to replace a
scan clock cycle with a functional clock cycle, or a
functional clock cycle with a scan clock cycle, in
order to reduce the power dissipation.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cauley:2011:PBC,
author = "Stephen Cauley and Venkataramanan Balakrishnan and Y.
Charlie Hu and Cheng-Kok Koh",
title = "A parallel branch-and-cut approach for detailed
placement",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929950",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We introduce a technique that utilizes distributing
computing resources for the efficient optimization of a
traditional physical design problem. Specifically, we
present a detailed placement strategy designed to
exploit distributed computing environments, where the
additional computing resources are employed in parallel
to improve the optimization time. A Mixed Integer
Programming (MIP) model and branch-and-cut optimization
strategy are employed to solve the standard cell
placement problem. By exploiting the problem structure,
our algorithm improves upon the solutions afforded by
existing optimization algorithms. First, an efficient
batch-branching technique can eliminate several integer
decision variables during each step of the optimization
procedure. This batch-branching scheme can be performed
serially or in parallel.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2011:GRS,
author = "Yih-Lang Li and Yu-Ning Chang and Wen-Nai Cheng",
title = "A gridless routing system with nonslicing
floorplanning-based crosstalk reduction on gridless
track assignment",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929951",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Track assignment, which is an intermediate stage
between global routing and detailed routing, provides a
good platform for promoting performance, and for
imposing additional constraints during routing, such as
crosstalk. Gridless track assignment (GTA) has not been
addressed in public literature. This work develops a
gridless routing system integrating a congestion-driven
global router, crosstalk-driven GTA and an enhanced
implicit connection-graph-based router. Initial
assignment is produced rapidly with a left-edge like
algorithm. Crosstalk reduction on the assignment is
then transformed to a restricted nonslicing
floorplanning problem, and a deterministic O-Tree based
algorithm is employed to reassign each net segment.
Finally, each panel is partitioned into several
subpanels, and the subpanels are reordered using branch
and bound algorithm to decrease the crosstalk
further.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2011:SBA,
author = "Yu Liu and Kaijie Wu and Ramesh Karri",
title = "Scan-based attacks on linear feedback shift register
based stream ciphers",
journal = j-TODAES,
volume = "16",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1929943.1929952",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 1 16:07:45 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Stream cipher is an important class of encryption
algorithm that encrypts plaintext messages one bit at a
time. Various stream ciphers are deployed in wireless
telecommunication applications because they have simple
hardware circuitry, are generally fast and consume very
low power. On the other hand, scan-based
Design-for-Test (DFT) is one of the most popular
methods to test IC devices. All flip-flops in the
Design Under Test are connected to one or more scan
chains and the states of the flip-flops can be scanned
out through these chains. In this paper, we present an
attack on stream cipher implementations by determining
the scan chain structure of the Linear Feedback Shift
Registers in their implementations.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Keutzer:2011:SSM,
author = "Kurt Keutzer and Peng Li and Li Shang and Hai Zhou",
title = "A Special Section on Multicore Parallel {CAD}:
Algorithm Design and Programming",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "21:1--21:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970354",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ludwin:2011:EDP,
author = "Adrian Ludwin and Vaughn Betz",
title = "Efficient and Deterministic Parallel Placement for
{FPGAs}",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "22:1--22:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970355",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We describe a parallel simulated annealing algorithm
for FPGA placement. The algorithm proposes and
evaluates multiple moves in parallel, and has been
incorporated into Altera's Quartus II CAD system.
Across a set of 18 industrial benchmark circuits, we
achieve geometric average speedups during the quench of
2.7x and 4.0x on four and eight processors,
respectively, with individual circuits achieving
speedups of up to 3.6x and 5.9x. Over the course of the
entire anneal, we achieve speedups of up to 2.8x and
3.7x, with geometric average speedups of 2.1x and 2.4x.
Our algorithm is the first parallel placer to optimize
for criteria other than wirelength, such as critical
path length, and is one of the few deterministic
parallel placement algorithms.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Han:2011:DIT,
author = "Yiding Han and Koushik Chakraborty and Sanghamitra Roy
and Vilasita Kuntamukkala",
title = "Design and Implementation of a Throughput-Optimized
{GPU} Floorplanning Algorithm",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "23:1--23:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970356",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a novel floorplanning
algorithm for GPUs. Floorplanning is an inherently
sequential algorithm, far from the typical programs
suitable for Single-Instruction Multiple-Thread
(SIMT)-style concurrency in a GPU. We propose a
fundamentally different approach of exploring the
floorplan solution space, where we evaluate concurrent
moves on a given floorplan. We illustrate several
performance optimization techniques for this algorithm
in GPUs. To improve the solution quality, we present a
comprehensive exploration of the design space,
including various techniques to adapt the annealing
approach in a GPU.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2011:GBP,
author = "Yifang Liu and Jiang Hu",
title = "{GPU}-Based Parallelization for Fast Circuit
Optimization",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "24:1--24:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970357",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The progress of GPU (Graphics Processing Unit)
technology opens a new avenue for boosting computing
power. This work is an attempt to exploit the GPU for
accelerating VLSI circuit optimization. We propose
GPU-based parallel computing techniques and apply them
on simultaneous gate sizing and threshold voltage
assignment, which is a popular method for VLSI
performance and power optimization. These techniques
include efficient task scheduling and memory
organization, all of which are aimed to fully utilize
the advantages of GPUs.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hsu:2011:MSS,
author = "Chia-Jui Hsu and Jos{\'e} Luis Pino and Shuvra S.
Bhattacharyya",
title = "Multithreaded Simulation for Synchronous Dataflow
Graphs",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "25:1--25:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970358",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "For system simulation, Synchronous DataFlow (SDF) has
been widely used as a core model of computation in
design tools for digital communication and signal
processing systems. The traditional approach for
simulating SDF graphs is to compute and execute static
schedules in single-processor desktop environments.
Nowadays, however, multicore processors are
increasingly popular desktop platforms for their
potential performance improvements through thread-level
parallelism. Without novel scheduling and simulation
techniques that explicitly explore thread-level
parallelism for executing SDF graphs, current design
tools gain only minimal performance improvements on
multicore platforms. In this article, we present a new
multithreaded simulation scheduler, called MSS, to
provide simulation runtime speedup for executing SDF
graphs on multicore processors.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liao:2011:AUB,
author = "Xiongfei Liao and Thambipillai Srikanthan",
title = "Accelerating {UNISIM}-Based Cycle-Level
Microarchitectural Simulations on Multicore Platforms",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "26:1--26:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970359",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "UNISIM has been shown to ease the development of
simulators for multi-/many-core systems. However,
UNISIM cycle-level simulations of large-scale
multiprocessor systems could be very time consuming. In
this article, we propose a systematic framework for
accelerating UNISIM cycle-level simulations on
multicore platforms. The proposed framework relies on
exploiting the fine-grained parallelism within the
simulated cycles using POSIX threads. A multithreaded
simulation engine has been devised from the
single-threaded UNISIM SystemC engine to facilitate the
exploitation of inherent parallelism. An adaptive
technique that manages the overall computation workload
by adjusting the number of threads employed at any
given time is proposed. In addition, we have introduced
a technique to balance the workloads of multithreaded
executions.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Garcia-Dopico:2011:NAV,
author = "Antonio Garc{\'\i}a-Dopico and Antonio P{\'e}rez and
Santiago Rodr{\'\i}guez and Mar{\'\i}a Isabel
Garc{\'\i}a",
title = "A New Algorithm for {VHDL} Parallel Simulation",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "27:1--27:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970360",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes a new algorithm for parallel
synchronous simulation of VHDL designs to be executed
on desktop computers. Besides executing VHDL processes
in parallel, the algorithm focuses on parallelizing the
simulation kernel with special emphasis on signal
grouping while maintaining language semantics.
Synchronous approaches are the most suitable for shared
memory multiprocessor (SMP) desktop computers but may
be difficult to parallelize because of the low activity
detected in most of the designs. The degree of
parallelism is increased in this approach by performing
an exhaustive VHDL signal dependencies analysis and
avoiding any sequential phase in the simulator. VHDL
semantics impose a synchronization barrier after each
phase, that is, the process and the kernel simulation
phase, as the language definition does not allow
simultaneous execution of kernel and processes.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zeng:2011:LDP,
author = "Zhiyu Zeng and Zhuo Feng and Peng Li and Vivek Sarin",
title = "Locality-Driven Parallel Static Analysis for Power
Delivery Networks",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "28:1--28:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970361",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Large VLSI on-chip Power Delivery Networks (PDNs) are
challenging to analyze due to the sheer network
complexity. In this article, a novel parallel
partitioning-based PDN analysis approach is presented.
We use the boundary circuit responses of each partition
to divide the full grid simulation problem into a set
of independent subgrid simulation problems. Instead of
solving exact boundary circuit responses, a more
efficient scheme is proposed to provide near-exact
approximation to the boundary circuit responses by
exploiting the spatial locality of the flip-chip-type
power grids. This scheme is also used in a block-based
iterative error reduction process to achieve fast
convergence.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhu:2011:MPL,
author = "Yuhao Zhu and Bo Wang and Yangdong Deng",
title = "Massively Parallel Logic Simulation with {GPUs}",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "29:1--29:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970362",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we developed a massively parallel
gate-level logical simulator to address the
ever-increasing computing demand for VLSI verification.
To the best of the authors' knowledge, this work is the
first one to leverage the power of modern GPUs to
successfully unleash the massive parallelism of a
conservative discrete event-driven algorithm, CMB
algorithm. A novel data-parallel strategy is proposed
to manipulate the fine-grain message passing mechanism
required by the CMB protocol. To support robust and
complete simulation for real VLSI designs, we establish
both a memory paging mechanism and an adaptive issuing
strategy to efficiently utilize the GPU memory with a
limited capacity.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chatterjee:2011:GLS,
author = "Debapriya Chatterjee and Andrew Deorio and Valeria
Bertacco",
title = "Gate-Level Simulation with {GPU} Computing",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "30:1--30:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970363",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Functional verification of modern digital designs is a
crucial, time-consuming task impacting not only the
correctness of the final product, but also its time to
market. At the heart of most of today's verification
efforts is logic simulation, used heavily to verify the
functional correctness of a design for a broad range of
abstraction levels. In mainstream industry verification
methodologies, typical setups coordinate the validation
effort of a complex digital system by distributing
logic simulation tasks among vast server farms for
months at a time. Yet, the performance of logic
simulation is not sufficient to satisfy the demand,
leading to incomplete validation processes, escaped
functional bugs, and continuous pressure on the EDA
industry to develop faster simulation solutions.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bondade:2011:HSC,
author = "Rajdeep Bondade and Dongsheng Ma",
title = "Hardware-Software Codesign of an Embedded
Multiple-Supply Power Management Unit for Multicore
{SoCs} Using an Adaptive Global\slash Local Power
Allocation and Processing Scheme",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "31:1--31:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970364",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power dissipation has become a critical design
constraint for the growth of modern multicore systems
due to increasing clock frequencies, leakage currents,
and system parasitics. To overcome this urgent crisis,
this article presents an embedded platform for on-chip
power management of a multicore System-on-Chip (SoC).
The design involves the development of two key
components, from the hardware to the software level.
From the hardware perspective, a multiple-supply power
management unit is proposed and is implemented using a
Single-Inductor Multiple-Output (SIMO) DC-DC converter.
To dynamically respond to the sensed instantaneous
power demands and to accurately control the power
delivery to the processor cores, the power management
unit employs a software-defined adaptive global/local
power allocation feedback controller.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Stitt:2011:TWD,
author = "Greg Stitt and Frank Vahid",
title = "Thread Warping: Dynamic and Transparent Synthesis of
Thread Accelerators",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "32:1--32:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970365",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We introduce thread warping, a dynamic optimization
technique that customizes multicore architectures to a
given application by dynamically synthesizing threads
into custom accelerator circuits on FPGAs
(Field-Programmable Gate Arrays). Thread warping builds
upon previous dynamic synthesis techniques for
single-threaded applications, enabling dynamic
architectural adaptation to different amounts of
thread-level parallelism, while also exploiting
parallelism within each thread to further improve
performance. Furthermore, thread warping maintains the
important separation of function from architecture,
enabling portability of applications to architectures
with different quantities of microprocessors and FPGAs,
an advantage not shared by static compilation/synthesis
approaches. We introduce an approach consisting of CAD
tools and operating system support that enables thread
warping on potentially any microprocessor/FPGA
architecture.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ain:2011:CPV,
author = "Antara Ain and Debjit Pal and Pallab Dasgupta and
Siddhartha Mukhopadhyay and Rajdeep Mukhopadhyay and
John Gough",
title = "{Chassis}: a Platform for Verifying {PMU} Integration
Using Autogenerated Behavioral Models",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "33:1--33:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970367",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power Management Units (PMUs) are large integrated
circuits consisting of many predesigned mixed-signal
components. PMU integration poses a serious
verification problem considering the size of the
integrated circuit and the complexity of analog
simulation. In this article we present an approach for
automatic generation of behavioral models for PMU
components from top-down skeleton models, fitted with
parameter values estimated by bottom-up parameter
extraction algorithms. It is shown that replacing PMU
components with these autogenerated hybrid
automata-based abstract behavioral models enables
significant simulation speedup ({$>$} 20X on our
industrial test cases) and helps in early detection of
integration errors. The article also justifies the
level of accuracy in our models with respect to the
goal of verifying integrated PMUs.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yu:2011:MQS,
author = "Yue Yu and Shangping Ren and Xiaobo Sharon Hu",
title = "A Metric for Quantifying Similarity between Timing
Constraint Sets in Real-Time Systems",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "34:1--34:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970368",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Real-time systems are systems in which their timing
behaviors must satisfy a specified set of timing
constraints and they often operate in a real-world
environment with scarce resources. As a result, the
actual runtime performance of these systems may deviate
from the design, either inevitably due to unpredictable
factors or by intention in order to improve system's
other Quality-of-Service (QoS) properties. In this
article, we first introduce a new metric, timing
constraint set similarity, to quantify the resemblance
between two different timing constraint sets. Because
directly calculating the exact value of the metric
involves calculating the size of a polytope which is a
\#P-hard problem, we instead introduce an efficient
method for estimating its bound.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Abouzeid:2011:COS,
author = "Fady Abouzeid and Sylvain Clerc and Fabian Firmin and
Marc Renaudin and Tiempo Sas and Gilles Sicard",
title = "{40nm CMOS} {0.35V}-Optimized Standard Cell Libraries
for Ultra-Low Power Applications",
journal = j-TODAES,
volume = "16",
number = "3",
pages = "35:1--35:??",
month = jun,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1970353.1970369",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 14 11:55:50 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Ultra-low voltage is now a well-known solution for
energy constrained applications designed using
nanometric process technologies. This work is focused
on setting up an automated methodology to enable the
design of ultra-low voltage digital circuits
exclusively using standard EDA tools. To achieve this
goal, a 0.35V energy-delay optimized library was
developed. This library, fully compliant with standard
library design flow and characterization, was verified
through the design and fabrication of a BCH decoder
circuit, following a standard front-end to back-end
flow. At 0.33V, it performs at 600 kHz with a dynamic
energy consumption reduced by a factor 14x from nominal
1.1V.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Qiu:2011:ATB,
author = "Meikang Qiu and Edwin H.-M. Sha",
title = "2011 {ACM} {TODAES} best paper award",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "36:1--36:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003696",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In high-level synthesis for real-time embedded systems
using heterogeneous functional units (FUs), it is
critical to select the best FU type for each task.
However, some tasks may not have fixed execution times.
This article models each varied execution time as a
probabilistic random variable and solves the
heterogeneous assignment with probability (HAP)
problem. The solution of the HAP problem assigns a
proper FU type to each task such that the total cost is
minimized while the timing constraint is satisfied with
a guaranteed confidence probability. The solutions to
the HAP problem are useful for both hard real-time and
soft real-time systems.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sen:2011:COV,
author = "Alper Sen",
title = "Concurrency-oriented verification and coverage of
system-level designs",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "37:1--37:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003697",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Correct concurrent System-on-Chips (SoCs) are very
hard to design and reason about. In this work, we
develop an automated framework complete with
concurrency-oriented verification and coverage
techniques for system-level designs. Our techniques are
different from traditional simulation-based reliability
techniques, since concurrency information is often lost
in traditional techniques. We preserve concurrency
information to obtain unique verification techniques
that allow us to predict potential errors (formulated
as transaction-level assertions) from error-free
simulations. In order to do this, we exploit the
inherent concurrency in the designs to generate and
analyze novel partial-order simulation traces.
Additionally, to evaluate the confidence on
verification results and the gauge progress of
verification, we develop novel mutation testing based
on concurrent coverage metrics.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Fournier:2011:PAC,
author = "Laurent Fournier and Avi Ziv and Ekaterina Kutsy and
Ofer Strichman",
title = "A probabilistic analysis of coverage methods",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "38:1--38:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003698",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Coverage is an important measure for the quality and
completeness of the functional verification of hardware
logic designs. Verification teams spend a significant
amount of time looking for bugs in the design and in
providing high-quality coverage. This process is
performed through the use of various sampling
strategies for selecting test inputs. The selection of
sampling strategies to achieve the verification goals
is typically carried out in an intuitive manner. We
studied several commonly used sampling strategies and
provide a probabilistic framework for assessing and
comparing their relative values. For this analysis, we
derived results for two measures of interest: first,
the probability of finding a bug within a given number
of samplings; and second, the expected number of
samplings until a bug is detected.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sun:2011:GDD,
author = "Wei-Tsun Sun and Zoran Salcic",
title = "{GALS-Designer}: a design framework for {GALS}
software systems",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "39:1--39:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003699",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "GALS-Designer is a framework for the design of
software systems which comply with the formal Globally
Asynchronous Locally Synchronous model of computation
(GALS). Those systems consist of single or multiple
GALS programs and their immediate environment, which
can be other programs and any other modules described
in SystemC. The framework integrates our libGALS
library for writing GALS programs and SystemC. It
enables modeling and simulation of single and multiple
GALS programs within the single SystemC executable
model on the host (simulation) operating system. The
same GALS programs can then be run without SystemC on a
target operating system for which the libGALS runtime
library is available.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mittal:2011:TVA,
author = "Kartikey Mittal and Arpit Joshi and Madhu Mutyam",
title = "Timing variation-aware scheduling and resource binding
in high-level synthesis",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "40:1--40:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003700",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to technological scaling, process variations have
increased significantly, resulting in large variations
in the delay of the functional units. Hence, the
worst-case approach is becoming increasingly
pessimistic in meeting a certain performance yield. The
problem therefore is to increase the performance as
much as possible while maintaining the desired yield.
In this work, we introduce an integer linear
programming (ILP) formulation for scheduling and
resource binding in high-level synthesis (HLS) which
tries to mitigate the effect of timing variations. In
the presence of delay variations of resources, as
chained resources can give a better latency and
performance yield trade-off, instead of considering
them independently, we consider external chaining of
resources, that is, two or more resources are connected
by external wiring, and exploit operation chaining.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2011:RCM,
author = "Xiaofang Wang and Pallav Gupta",
title = "Resource-constrained multiprocessor synthesis for
floating-point applications on {FPGAs}",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "41:1--41:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003701",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Although state-of-the-art field-programmable gate
arrays offer exciting new opportunities in exploring
low-cost high-performance architectures for
data-intensive scientific applications, they also
present serious challenges.
Multiprocessor-on-programmable-chip, which integrates
software programmability and hardware reconfiguration,
provides substantial flexibility that results in
shorter design cycles, higher performance, and lower
cost. In this article, we present an
application-specific design methodology for
multiprocessor-on-programmable-chip architectures that
target applications involving large matrices and
floating-point operations. Given an application with
specific energy-performance and resource constraints,
our methodology aims to customize the architecture to
match the diverse computation and communication
requirements of the application tasks. Graph-based
analysis of the application drives system synthesis
that employs a precharacterized, parameterized hardware
component library of functional units.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2011:MAO,
author = "Yongjoo Kim and Jongeun Lee and Aviral Shrivastava and
Yunheung Paek",
title = "Memory access optimization in compilation for
coarse-grained reconfigurable architectures",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "42:1--42:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003702",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Coarse-grained reconfigurable architectures (CGRAs)
promise high performance at high power efficiency. They
fulfil this promise by keeping the hardware extremely
simple, and moving the complexity to application
mapping. One major challenge comes in the form of data
mapping. For reasons of power-efficiency and
complexity, CGRAs use multibank local memory, and a row
of PEs share memory access. In order for each row of
the PEs to access any memory bank, there is a hardware
arbiter between the memory requests generated by the
PEs and the banks of the local memory. However, a
fundamental restriction remains in that a bank cannot
be accessed by two different PEs at the same time.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bruneel:2011:DDF,
author = "Karel Bruneel and Wim Heirman and Dirk Stroobandt",
title = "Dynamic data folding with parameterizable {FPGA}
configurations",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "43:1--43:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003703",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In many applications, subsequent data manipulations
differ only in a small set of parameter values. Because
of their reconfigurability, FPGAs (field programmable
gate arrays) can be configured with a specialized
circuit each time the parameter values change. This
technique is called dynamic data folding. The
specialized circuits are smaller and faster than their
generic counterparts. However, the overhead involved in
generating the configurations for the specialized
circuits at runtime is very large when conventional
tools are used, and this overhead will in many cases
negate the benefit of using optimized configurations.
This article introduces an automatic method for
generating runtime parameterizable configurations from
arbitrary Boolean circuits.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dong:2011:PCS,
author = "Wei Dong and Peng Li",
title = "Parallel circuit simulation with adaptively controlled
projective integration",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "44:1--44:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003704",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, a parallel transient circuit
simulation approach based on an adaptively-controlled
time-stepping scheme is proposed. Different from the
widely-used implicit numerical integration techniques
in most transient simulators, this work exploits the
recently-developed explicit telescopic projective
numerical integration method for efficient parallel
circuit simulation. Because telescopic projective
integration addresses the well-known stability issue of
explicit numerical integrations by adopting
combinations of inner integrators and outer integrators
in a multilevel fashion, the simulation time-step is no
longer limited by the smallest time constant in the
circuit. With dynamic control of telescopic projective
integration, the proposed projective integration
framework not only leads to noticeable efficiency
improvement in circuit simulation, it also lends itself
to straightforward parallelization due to its explicit
nature.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Maestro:2011:MEL,
author = "Juan Antonio Maestro and Pedro Reviriego and Sanghyeon
Baeg and Shijie Wen and Richard Wong",
title = "Mitigating the effects of large multiple cell upsets
{(MCUs)} in memories",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "45:1--45:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003705",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Reliability is a critical issue for memories.
Radiation particles that hit the device can cause
errors in some cells, which can lead to data
corruption. To avoid this problem, memories are
protected with per-word error correction codes (ECCs).
Typically, single-error correction and double-error
detection (SEC-DED) codes are used. As technology
scales, errors caused by radiation particles on
memories tend to affect more than one cell---what is
known as a multiple cell upset (MCU). To ensure that
only a single cell is affected in each word,
interleaving is used. With interleaving, cells that
belong to the same word are placed at a sufficient
distance such that an MCU will only affect a single
cell on each word.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Healy:2011:IMF,
author = "Michael B. Healy and Fayez Mohamood and Hsien-Hsin S.
Lee and Sung Kyu Lim",
title = "Integrated microarchitectural floorplanning and
run-time controller for inductive noise mitigation",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "46:1--46:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003706",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a design methodology using
two complementary techniques to address high-frequency
inductive noise in the early design phase of a
microprocessor. First, we propose a noise-aware
floorplanning technique that uses microarchitectural
profile information to create noise-aware floorplans.
Second, we present the design of a dynamic
inductive-noise controlling mechanism at the
microarchitectural level, which limits the on-die
current demand within predefined bounds, regardless of
the native power and current characteristics of running
applications. By dynamically monitoring the access
patterns of microarchitectural modules, our mechanism
can effectively limit simultaneous switching activity
of close-by modules, thereby leveling voltage ringing
at local power-pins.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2011:ICA,
author = "Jin-Tai Yan",
title = "{IO} connection assignment and {RDL} routing for
flip-chip designs",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "47:1--47:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003707",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Given a set of IO buffers and a set of bump balls with
the capacity constraints between two adjacent bump
balls, based on the construction of the Delaunay
triangulation and a Manhattan Voronoi diagram, an O(n2)
assignment algorithm is proposed to assign all the IO
connections in a single redistribution layer for IO
connection assignment, where n is the number of bump
balls in a flip-chip design. Furthermore, based on the
computation of the probabilistic congestion for the
assigned IO connections, an O(n2) routing algorithm is
proposed to minimize the total wirelength to route all
the assigned IO connections while satisfying the
capacity constraints for single-layer RDL routing.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2011:CTS,
author = "Tak-Yung Kim and Taewhan Kim",
title = "Clock Tree synthesis for {TSV}-based {$3$D} {IC}
designs",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "48:1--48:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003708",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "For the cost-effective implementation of clock trees
in through-silicon via (TSV)-based 3D IC designs, we
propose core algorithms for 3D clock tree synthesis.
For a given abstract tree topology, we propose DLE-3D
(\underline{d}eferred \underline{l}ayer
\underline{e}mbedding for \underline{l} ICs), which
optimally finds the embedding layers of tree nodes, so
that the TSV cost required for a tree topology is
minimized, and DME-3D (\underline{d}eferred
\underline{m}erge \underline{e}mbedding for
\underline{l} ICs), which is an extended algorithm of
the 2D merging segment, to minimize the total
wirelength in 3D design space, with the consideration
of the TSV effect on delay. In addition, when an
abstract tree topology is not given, we propose NN-3D
(\underline{n}earest \underline{n}eighbor selection for
\underline{l} ICs), which constructs a (TSV and
wirelength) cost-effective abstract tree topology for
3D ICs.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lu:2011:CBP,
author = "Jianchao Lu and Baris Taskin",
title = "Clock buffer polarity assignment with skew tuning",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "49:1--49:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003709",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A clock polarity assignment method is proposed that
reduces the peak current on the vdd/gnd rails of an
integrated circuit. The impacts of (i) the output
capacitive load on the peak current drawn by the
sink-level clock buffers, and (ii) the buffer/inverter
replacement scheme of polarity assignment on timing
accuracy are considered in the formulation. The
proposed sink-level-only polarity assignment is
performed by a lexi-search algorithm in order to
balance the peak current on the clock tree. Most of the
previous polarity assignment methods that do not
include clock tree resynthesis lead to an undesirable
increase in the worst corner clock skew.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2011:ALR,
author = "Shaoxi Wang and Xinzhang Jia and Arthur B. Yeh and
Lihong Zhang",
title = "Analog layout retargeting using geometric
programming",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "50:1--50:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003710",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "To satisfy the requirements of complex and special
analog layout constraints, a new analog layout
retargeting method is presented in this article. Our
approach uses geometric programming (GP) to achieve new
technology design rules, implement device symmetry and
matching constraints, and manage parasitics
optimization. The GP, a class of nonlinear optimization
problem, can be transferred or fitted into a convex
optimization problem. Therefore, a global optimum
solution can be achieved. Moreover, the GP can address
problems with large-scale variables and constraints
without setting an initialization variable range. To
meet the prerequisites of the GP methodology for analog
layout automation, we propose three kinds of
mathematical transformations, including negative
coefficient transformation, fraction transformation,
and maximum of posynomial transformation.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Duarte:2011:HDP,
author = "Filipa Duarte and Jos Hulzink and Jun Zhou and Jan
Stuijt and Jos Huisken and Harmke {De Groot}",
title = "A {36$ \mu $W} heartbeat-detection processor for a
wireless sensor node",
journal = j-TODAES,
volume = "16",
number = "4",
pages = "51:1--51:??",
month = oct,
year = "2011",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2003695.2003711",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 22 09:25:48 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In order to provide better services to elderly people,
home healthcare monitoring systems have been
increasingly deployed. Typically, these systems are
based on wireless sensor nodes, and should utilize very
low energy during their lifetimes, as they are powered
by scavengers. In this article, we present an ultra-low
power processing system for a wireless sensor node for
very low duty cycle applications. In the CoolBio
system-on-chip, we utilized several power reduction
techniques at both the architecture level and the
circuit level. These techniques include feature
extraction, voltage and frequency scaling, clock and
power gating and a redesign of key standard cells.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Verbeek:2012:EFS,
author = "Freek Verbeek and Julien Schmaltz",
title = "Easy Formal Specification and Validation of Unbounded
{Networks-on-Chips} Architectures",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071357",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a formal specification and
validation environment to prove safety and liveness
properties of parametric -- unbounded -- NoCs
architectures described at a high-level of abstraction.
The environment improves the GeNoC approach with two
new theorems, proving evacuation and starvation
freedom. The application of the validation methodology
is illustrated on a HERMES NoC with adaptive west-first
routing and wormhole switching. This case study
illustrates the strong compositional aspect of the
GeNoC environment. The complete specification of this
HERMES instance, together with the proof that the
specification is deadlock-free, starvation free, and
all messages eventually leave the network at their
correct destination, could be achieved in about a
week.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pasha:2012:SLS,
author = "Muhammad Adeel Pasha and Steven Derrien and Olivier
Sentieys",
title = "System-Level Synthesis for Wireless Sensor Node
Controllers: a Complete Design Flow",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071358",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Wireless sensor networks (WSN) is a new and very
challenging research field for embedded system design
automation. Engineering a WSN node hardware platform is
known to be a tough challenge, as the design must
enforce many severe constraints, among which energy
dissipation is by far the most important one. WSN node
devices have until now been designed using
off-the-shelf low-power microcontroller units (MCUs),
even if their power dissipation is still an issue and
hinders the widespread use of this new technology. In
this work, we propose a complete system-level flow for
an alternative approach based on the concept of
hardware microtasks, which relies on hardware
specialization and power gating to drastically improve
the energy efficiency of the computational/control part
of the node.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Aksoy:2012:OAM,
author = "Levent Aksoy and Eduardo Costa and Paulo Flores and
Jose Monteiro",
title = "Optimization Algorithms for the Multiplierless
Realization of Linear Transforms",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071359",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article addresses the problem of finding the
fewest numbers of addition and subtraction operations
in the multiplication of a constant matrix with an
input vector---a fundamental operation in many linear
digital signal processing transforms. We first
introduce an exact common subexpression elimination
(CSE) algorithm that formalizes the minimization of the
number of operations as a 0-1 integer linear
programming problem. Since there are still instances
that the proposed exact algorithm cannot handle due to
the NP-completeness of the problem, we also introduce a
CSE heuristic algorithm that iteratively finds the most
common 2-term subexpressions with the minimum conflicts
among the expressions.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Leung:2012:PVI,
author = "Mario K. Y. Leung and Eric K. I. Chio and Evangeline
F. Y. Young",
title = "Postplacement Voltage Island Generation",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071360",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "High power consumption will not only shorten the
battery life of handheld devices, but also cause
thermal and reliability problems. To lower power
consumption, one way is to reduce the supply voltage as
in multisupply voltage (MSV) designs. In region-based
MSV, a circuit will be partitioned into ``voltage
islands'' where each island occupies a contiguous
physical space and operates at one supply voltage. In
the work of Wu et al. [2005], this voltage supply
problem is addressed, and the input placement is
partitioned into a set of rectangular voltage islands
by a slicing structure. However, the constraint of
using a slicing structure prohibits better solutions in
their approach.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2012:CMI,
author = "Hai Wang and Sheldon X.-D. Tan and Ryan Rakib",
title = "Compact Modeling of Interconnect Circuits over Wide
Frequency Band by Adaptive Complex-Valued Sampling
Method",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071361",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a new model
order-reduction method for compact modeling of
interconnect circuits over wide frequency band using a
novel complex-valued adaptive sampling and error
estimation scheme. We address the outstanding error
control problems in the existing sampling-based
reduction framework over a frequency band. Our new
method, WBMOR, explicitly and efficiently computes the
exact residual errors to guide the sampling process. We
show by sampling along the imaginary axis and
performing a new complex-valued reduction that the
reduced model will match exactly with the original
model at the sample points. Additionally, we show in
theory that the proposed method can achieve the error
bound over a given frequency range.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2012:RDP,
author = "Jing-Wei Lin and Tsung-Yi Ho and Iris Hui-Ru Jiang",
title = "Reliability-Driven Power\slash Ground Routing for
Analog {ICs}",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071362",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Electromigration and voltage drop (IR-drop) are two
major reliability issues in modern IC design.
Electromigration gradually creates permanently open or
short circuits due to excessive current densities;
IR-drop causes insufficient power supply, thus
degrading performance or even inducing functional
errors because of nonzero wire resistance. Both types
of failure can be triggered by insufficient wire
widths. Although expanding the wire width alleviates
electromigration and IR-drop, unlimited expansion not
only increases the routing cost, but may also be
infeasible due to the limited routing resource. In
addition, electromigration and IR-drop manifest mainly
in the power/ground (P/G) network. Therefore, taking
wire widths into consideration is desirable to prevent
electromigration and IR-drop at P/G routing.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ioannides:2012:CDT,
author = "Charalambos Ioannides and Kerstin I. Eder",
title = "Coverage-Directed Test Generation Automated by Machine
Learning --- a Review",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071363",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The increasing complexity and size of digital designs,
in conjunction with the lack of a potent verification
methodology that can effectively cope with this trend,
continue to inspire engineers and academics in seeking
ways to further automate design verification. In an
effort to increase performance and to decrease
engineering effort, research has turned to artificial
intelligence (AI) techniques for effective solutions.
The generation of tests for simulation-based
verification can be guided by machine-learning
techniques. In fact, recent advances demonstrate that
embedding machine-learning (ML) techniques into a
coverage-directed test generation (CDG) framework can
effectively automate the test generation process,
making it more effective and less error-prone.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pan:2012:ERE,
author = "Zhaoliang Pan and Melvin A. Breuer",
title = "Error Rate Estimation for Defective Circuits via Ones
Counting",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071364",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With VLSI circuit feature size scaling down, it is
becoming more difficult and expensive to achieve a
desired level of yield. Error-tolerance employs
defective chips that occasionally produce erroneous yet
acceptable results in targeted applications, and has
been proposed as one way to increase effective yield.
These chips are characterized by criteria set by
specific applications. Error rate, an upper-bound on
how frequent errors occur at an output, is one such
criterion. In this article we focus on the following
problem: given a combinational logic circuit that is
defective, and hence occasionally produces an erroneous
output, how can we determine the error rate of each
output line by using ones counting?",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Peng:2012:SSE,
author = "Huan-Kai Peng and Hsuan-Ming Huang and Yu-Hsin Kuo and
Charles H.-P. Wen",
title = "Statistical Soft Error Rate {(SSER)} Analysis for
Scaled {CMOS} Designs",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071365",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article re-examines the soft error effect caused
by radiation-induced particles beyond the deep
submicron regime. Considering the impact of process
variations, voltage pulse widths of transient faults
are found no longer monotonically diminishing after
propagation, as they were formerly. As a result, the
soft error rates in scaled electronic designs escape
traditional static analysis and are seriously
underestimated. In this article we formulate the
statistical soft error rate (SSER) problem and present
two frameworks to cope with the aforementioned
sophisticated issues. The table-lookup framework
captures the change of transient-fault distributions
implicitly by using a Monte-Carlo approach, whereas the
SVR-learning framework does the task explicitly by
using statistical learning theory.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gong:2012:FNM,
author = "Fang Gong and Xuexin Liu and Hao Yu and Sheldon X. D.
Tan and Junyan Ren and Lei He",
title = "A Fast Non-{Monte-Carlo} Yield Analysis and
Optimization by Stochastic Orthogonal Polynomials",
journal = j-TODAES,
volume = "17",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2071356.2071366",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 26 16:38:42 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Performance failure has become a significant threat to
the reliability and robustness of analog circuits. In
this article, we first develop an efficient
non-Monte-Carlo (NMC) transient mismatch analysis,
where transient response is represented by stochastic
orthogonal polynomial (SOP) expansion under PVT
variations and probabilistic distribution of transient
response is solved. We further define performance yield
and derive stochastic sensitivity for yield within the
framework of SOP, and finally develop a gradient-based
multiobjective optimization to improve yield while
satisfying other performance constraints. Extensive
experiments show that compared to Monte Carlo-based
yield estimation, our NMC method achieves up to 700X
speedup and maintains 98\% accuracy.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2012:ESF,
author = "Meng-Huan Wu and Peng-Chih Wang and Cheng-Yang Fu and
Ren-Song Tsay",
title = "An Extended {SystemC} Framework for Efficient
{HW\slash SW} Co-Simulation",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "11:1--11:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159543",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose an extended SystemC
framework that directly enables software simulation in
SystemC. Although SystemC has been widely adopted for
system-level simulation of hardware designs nowadays,
to complete HW/SW co-simulation, it still requires an
additional instruction set simulator (ISS) for software
execution. However, the heavy intercommunication
overheads between the two heterogeneous simulators
would significantly slow down simulation performance.
To deal with this issue, our proposed approach
automatically generates high-speed and equivalent
SystemC models for target software applications that
can be directly integrated with hardware models for
complete HW/SW co-simulation. In addition, to properly
handle multitasking, an efficient OS model is devised
to support accurate preemptive scheduling. Since both
the generated application model and the OS model are
constructed in SystemC modules, our approach avoids
heavy intercommunication overheads and achieves over
1,000 times faster simulation than that of the
conventional ISS-SystemC approach. Experimental results
demonstrate that our extended SystemC approach can
perform at 50 to 220 MIPS while offering accurate
simulation results.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhou:2012:ONC,
author = "Pingqiang Zhou and Ping-Hung Yuh and Sachin S.
Sapatnekar",
title = "Optimized {$3$D} Network-on-Chip Design Using
Simulated Allocation",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "12:1--12:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159544",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Three-dimensional (3D) silicon integration
technologies have provided new opportunities for
Network-on-Chip (NoC) architecture design in
Systems-on-Chip (SoCs). In this article, we consider
the application-specific NoC architecture design
problem in a 3D environment. We present an efficient
floorplan-aware 3D NoC synthesis algorithm based on
simulated allocation (SAL), a stochastic method for
traffic flow routing, and accurate power and delay
models for NoC components. We demonstrate that this
method finds greatly improved solutions compared to a
baseline algorithm reflecting prior work. To evaluate
the SAL method, we compare its performance with the
widely used simulated annealing (SA) method and show
that SAL is much faster than SA for this application,
while providing solutions of very similar quality. We
then extend the approach from a single-path routing to
a multipath routing scheme and explore the trade-off
between power consumption and runtime for these two
schemes. Finally, we study the impact of various
factors on the network performance in 3D NoCs,
including the TSV count and the number of 3D tiers. Our
studies show that link power and delay can be
significantly improved when moving from a 2D to a 3D
implementation, but the improvement flattens out as the
number of 3D tiers goes beyond a certain point.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sun:2012:PTA,
author = "Guangyu Sun and Huazhong Yang and Yuan Xie",
title = "Performance\slash Thermal-Aware Design of
{$3$D}-Stacked {L2} Caches for {CMPs}",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "13:1--13:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159545",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Three-dimensional (3D) stacking technology enables
integration of more memory on top of chip
multiprocessors (CMPs). As the number of cores and the
capacity of on-chip memory increase, the Non-Uniform
Cache Architecture (NUCA) becomes more attractive.
Compared to 2D cases, 3D stacking provides more options
for the design of on-chip memory due to numerous
advantages, such as the extra layout dimension, low
latency across layers, etc. On the other hand, 3D
stacking aggravates the thermal problem due to the
increase of power density. In this work, we first study
the design of 3D-stacked set-associative L2 caches
through managing the placement of cache ways. The
evaluation results show that the placement and
corresponding management of 3D cache ways have an
impact on the performance of CMPs. Then, we show that
the efficiency of thermal control is also related to
the placement of cache ways. For caches implemented
with different memory technologies, the placement and
management of cache ways have different effects on
power consumption and power distribution. Consequently,
we propose techniques to improve the efficiency of
thermal control for different memory technologies. The
evaluation results show the trade-off between
performance and thermal control efficiency.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2012:TAS,
author = "Chin-Hsien Wu and Hsin-Hung Lin",
title = "Timing Analysis of System Initialization and Crash
Recovery for a Segment-Based Flash Translation Layer",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "14:1--14:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159546",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recently, the capacity of flash-memory storage systems
has grown rapidly, and flash-memory technology has
advanced along with the wave of consumer electronics
and embedded systems. In order to properly manage
product cost and initialization performance, vendors
face serious challenges in system design and analysis.
Thus, the timing analysis of system initialization and
crash recovery for a segment-based flash translation
layer has become an important research topic. This
article focuses on system initialization, crash
recovery, and timing analysis. The timing analysis of
system initialization involves the relationship between
the size of the main memory and the system
initialization time. The timing analysis of crash
recovery explains the worst case recovery time. The
experiments in this study show that the timing analysis
of system initialization and crash recovery can be
applied to the segment-based flash translation layer.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Milder:2012:CGH,
author = "Peter Milder and Franz Franchetti and James C. Hoe and
Markus P{\"u}schel",
title = "Computer Generation of Hardware for Linear Digital
Signal Processing Transforms",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "15:1--15:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159547",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Linear signal transforms such as the discrete Fourier
transform (DFT) are very widely used in digital signal
processing and other domains. Due to high performance
or efficiency requirements, these transforms are often
implemented in hardware. This implementation is
challenging due to the large number of algorithmic
options (e.g., fast Fourier transform algorithms or
FFTs), the variety of ways that a fixed algorithm can
be mapped to a sequential datapath, and the design of
the components of this datapath. The best choices
depend heavily on the resource budget and the
performance goals of the target application. Thus, it
is difficult for a designer to determine which set of
options will best meet a given set of requirements. In
this article we introduce the Spiral hardware
generation framework and system for linear transforms.
The system takes a problem specification as input as
well as directives that define characteristics of the
desired datapath. Using a mathematical language to
represent and explore transform algorithms and datapath
characteristics, the system automatically generates an
algorithm, maps it to a datapath, and outputs a
synthesizable register transfer level Verilog
description suitable for FPGA or ASIC implementation.
The quality of the generated designs rivals the best
available handwritten IP cores.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Weng:2012:TOS,
author = "Shih-Hung Weng and Yu-Min Kuo and Shih-Chieh Chang",
title = "Timing Optimization in Sequential Circuit by
Exploiting Clock-Gating Logic",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "16:1--16:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159548",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Clock gating is a popular technique for reducing power
dissipation. In a circuit with clock gating, the clock
signal can be shut off without changing the
functionality under certain clock-gating conditions. In
this article, we observe that the clock-gating
conditions and the next-state function of a Flip-Flop
(FF) are correlated and can be used for sequential
circuit optimization. We also show that the
implementation of the next-state function of any FF can
be just an inverter if the clock signal is
appropriately gated. By exploiting the flexibility
between the clock-gating conditions and the next-state
function, we propose an iterative optimization
algorithm to improve the timing of sequential circuits.
We present experimental results of a set of benchmark
circuits with a timing improvement of 10.20\% on
average.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kurimoto:2012:YRI,
author = "Masanori Kurimoto and Jun Matsushima and Shigeki
Ohbayashi and Yoshiaki Fukui and Michio Komoda and
Nobuhiro Tsuda",
title = "A Yield and Reliability Improvement Methodology Based
on Logic Redundant Repair with a Repairable Scan
Flip-Flop Designed by Push Rule",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "17:1--17:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159549",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a yield improvement methodology which
repairs a faulty chip due to logic defect by using a
repairable scan flip-flop (R-SFF). Our methodology
improves area penalty, which is a large issue for logic
repair technology in actual products, by using repair
grouping and a redundant cell insertion algorithm and
by pushing the design rule for the repairable area of
R-SFF. Additionally, compared with the conventional
method, we reduce the number of wire connections around
redundant cells by improving the replacement method of
the faulty cell by the redundant cell. The proposed
methodology reduces the total area penalty caused by
the logic redundant repair to 3.6\% and improves the
yield, that is the number of good chips on a wafer, by
4.7\% when the defect density is 1.0[1/cm$^2$].
Furthermore, we propose the strategy to repair the
in-field failures due to latent defect for the chip
whose repair function had not been used in the shipment
test.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xiang:2012:SFF,
author = "Dong Xiang and Zhen Chen and Laung-Terng Wang",
title = "Scan Flip-Flop Grouping to Compress Test Data and
Compact Test Responses for Launch-on-Capture Delay
Testing",
journal = j-TODAES,
volume = "17",
number = "2",
pages = "18:1--18:??",
month = apr,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2159542.2159550",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 20 17:41:41 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Test data compression is a much more difficult problem
for launch-on-capture (LOC) delay testing, because test
data for LOC delay testing is much more than that of
stuck-at fault testing, and LOC delay fault test
generation in the two-frame circuit model can specify
many more inputs. A new scan architecture is proposed
to compress test stimulus data, compact test responses,
and reduce test application time for LOC delay fault
testing. The new scan architecture merges a number of
scan flip-flops into the same group, where all scan
flip-flops in the same group are assigned the same
values for all test pairs. Sufficient conditions are
presented for including any pair of scan flip-flops
into the same group for LOC transition, non-robust path
delay, and robust path delay fault testing. Test data
for LOC delay testing based on the new scan
architecture can be compressed significantly. Test
application time can also be reduced greatly.
Sufficient conditions are presented to construct a test
response compactor for LOC transition, non-robust, and
robust path delay fault testing. Folded scan forest and
test response compactor are constructed for further
test data compression. Sufficient experimental results
are presented to show the effectiveness of the
method.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ray:2012:ISS,
author = "Sandip Ray and Jayanta Bhadra and Magdy S. Abadir and
Li-C. Wang and Aarti Gupta",
title = "Introduction to special section on verification
challenges in the concurrent world",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "19:1--19:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209292",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Verbeek:2012:TFV,
author = "Freek Verbeek and Julien Schmaltz",
title = "Towards the formal verification of cache coherency at
the architectural level",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "20:1--20:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209293",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Cache coherency is one of the major issues in
multicore systems. Formal methods, in particular
model-checking, have been successful at verifying
high-level protocols, but, to the best of our
knowledge, the verification of cache coherency at the
architectural level is still an open issue. All
existing verification efforts assume a reliable
interconnect, that is, messages eventually reach their
destination. We discuss the challenge of discharging
this assumption at the architectural level where
implementation details of the interconnect are mixed
with a cache coherency protocol. Our automatic approach
is based on a well-defined set of primitives to express
architectural models, a generic model of communication
fabrics expressed in an automated theorem proving
system, and a dedicated algorithm for deadlock and
livelock detection. We argue that reliability depends
on the interaction between the interconnect and the
cache coherency protocol. They must be verified
altogether as their combination creates intricate
message dependencies. We sketch our verification
approach and apply it to a simple write-invalidate
protocol on the Spidergon network-on-chip from
STMicroelectronics. Our approach is promising. For this
simple protocol, networks with tens of agents and
hundreds of components can be analyzed within
seconds.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Holt:2012:FLP,
author = "Jim Holt and Jaideep Dastidar and David Lindberg and
John Pape and Peng Yang",
title = "A full lifecycle performance verification methodology
for multicore systems-on-chip",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "21:1--21:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209294",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicore Systems-on-Chip (MCSoC) are comprised of a
rich set of processor cores, specialized hardware
accelerators, and I/O interfaces. Functional
verification of these complex designs is a critical and
demanding task, however, focusing only on functional
verification is very risky because the motivation for
building such systems in the first place is to achieve
high levels of system throughput. Therefore a
functionally correct MCSoC that does not exhibit
sufficient performance will fail in the market. In
addition, limiting performance verification efforts to
analyzing individual system components in isolation is
insufficient due to: (1) the degree of system-level
resource contention that an application domain imposes
on the MCSoC, and (2) the degree of configuration
flexibility that is typically afforded by an MCSoC.
These factors motivate system-level performance
verification of MCSoC. This article presents an
important industrial case study of MCSoC performance
verification involving both pre- and postsilicon
analysis, highlighting the methodology used, the
lessons learned, and recommendations for improvement.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Elwakil:2012:DRM,
author = "Mohamed Elwakil and Zijiang Yang",
title = "Deterministic replay for message-passing-based
concurrent programs",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "22:1--22:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209295",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Multicore Communications API (MCAPI) is a new
message-passing API that was released by the Multicore
Association. MCAPI provides an interface designed for
closely distributed embedded systems with multiple
cores on a chip and/or chips on a board. Similar to
parallel programs in other domains, debugging MCAPI
programs is a challenging task due to their
nondeterministic behavior. In this article we present a
tool that is capable of deterministically replaying
MCAPI program executions, which provides valuable
insight for MCAPI developers in case of failure.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Deniz:2012:VCM,
author = "Etem Deniz and Alper Sen and Jim Holt",
title = "Verification and coverage of message passing multicore
applications",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "23:1--23:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209296",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We describe verification and coverage methods for
multicore software that uses message passing libraries
for communication. Specifically, we provide techniques
to improve reliability of software using the new
industry standard MCAPI by the Multicore Association.
We develop dynamic predictive verification techniques
that allow us to find actual and potential errors in a
multicore software. Some of these error types are
deadlocks, race conditions, and violation of temporal
assertions. We complement our verification techniques
with a mutation-testing-based coverage metric. Coverage
metrics enable measuring the quality of verification
tests. We implemented our techniques in tools and
validated them on several multicore programs that use
the MCAPI standard. We implement our techniques in
tools and experimentally show the effectiveness of our
approach. We find errors that are not found using
traditional dynamic verification techniques and we can
potentially explore execution schedules different than
the original program with our coverage tool. This is
the first time such predictive verification and
coverage metrics have been developed for MCAPI.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Qin:2012:DTG,
author = "Xiaoke Qin and Prabhat Mishra",
title = "Directed test generation for validation of multicore
architectures",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "24:1--24:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209297",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Functional validation is widely acknowledged as a
major challenge for multicore architectures. Directed
tests are promising since a significantly smaller
number of directed tests can achieve the same coverage
goal compared to constrained-random tests. SAT-based
bounded model checking is effective for automated
generation of directed tests (counterexamples). While
existing approaches focus on clause forwarding between
different bounds to reduce the test generation time,
this article proposes a novel technique that exploits
temporal, structural, and spatial symmetry in multicore
designs at the same time. Our proposed technique
enables the reuse of the knowledge learned from one
core to the remaining cores in multicore architectures
(structural symmetry), from one bound to the next for a
give property (temporal symmetry), as well as from one
property to other properties (spatial symmetry). The
experimental results demonstrate that our approach can
significantly (3--10 times) reduce overall test
generation time compared to existing approaches.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Singh:2012:TRT,
author = "Padmaraj Singh and Vijaykrishnan Narayanan and David
L. Landis",
title = "Targeted random test generation for power-aware
multicore designs",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "25:1--25:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209298",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicore Register Transfer Level (RTL) model
simulations are indispensable in exposing subtle memory
subsystem bugs. Validating memory consistency,
coherency, and atomicity is a crucial design
verification task. Random MultiProcessor (MP) test
generators play critical roles in pre- and post-silicon
validation. The Advanced Configuration and Power
Interface (ACPI) standard supports dynamic frequency
and voltage scaling by controlling performance states
(P-States), yet multicore verification is generally
conducted with cores operating at the P0-State.
Independently varying core frequencies introduces new
sets of intracore and intercore traffic latencies. The
article introduces targeted random MP test generation
techniques for multicore P-State functional
verification. It develops a simple coverage metric to
evaluate MP test effectiveness. The metric is
demonstrated on MIP's instruction-set-based random MP
tests. A novel technique is introduced to modulate the
test address space by the spherical Bessel function.
The technique delivers an order of magnitude coverage
improvement over completely random tests. The article
then outlines minimal P-State combinations to be
exercised by MP tests. It also formulates two new
methodologies to set up and apply MP tests for
effective multicore P-State coverage. The methodologies
are termed SimInit and SimTransition. First-level
analyses indicate that these methods can deliver 97\%
to 100\% improvement over random MP test coverage.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jang:2012:AAA,
author = "Wooyoung Jang and David Z. Pan",
title = "{A3MAP}: Architecture-aware analytic mapping for
networks-on-chip",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "26:1--26:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209299",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose novel and global
Architecture-Aware Analytic MAPping (A3MAP) algorithms
applied to Networks-on-Chip (NoCs) not only with
homogeneous Processing Elements (PEs) on a regular mesh
network as done by most previous application mapping
algorithms but also with heterogeneous PEs on an
irregular mesh or custom network. As the main
contributions, we develop a simple yet efficient
interconnection matrix that can easily model any core
graph and network. Then, an application mapping problem
is exactly formulated to Mixed Integer Quadratic
Programming (MIQP). Since MIQP is NP-hard, we propose
two effective heuristics, a successive relaxation
algorithm achieving short runtime, called A3MAP-SR and
a genetic algorithm achieving high mapping quality,
called A3MAP-GA. We also propose a partition-based
application mapping approach for large-scale NoCs,
which provides better trade-off between performance and
runtime. Experimental results show that A3MAP
algorithms reduce total hop count, compared to the
previous application mapping algorithms optimized for a
regular mesh network, called NMAP [Murali and Micheli
2004] and for an irregular mesh and custom network,
called CMAP [Tornero et al. 2008]. Furthermore, A3MAP
algorithms make packets travel shorter distance than
CMAP, which is related to energy consumption.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Foroozannejad:2012:PBM,
author = "Mohammad H. Foroozannejad and Trevor Hodges and Matin
Hashemi and Soheil Ghiasi",
title = "Postscheduling buffer management trade-offs in
streaming software synthesis",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "27:1--27:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209300",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Streaming applications, which are abundant in many
disciplines such as multimedia, networking, and signal
processing, require efficient processing of a seemingly
infinite sequence of input data. In the context of
streaming software synthesis from data flow graphs, we
study the inherent trade-off between memory requirement
and compilation runtime, under a given task firing
schedule. We utilize postscheduling analysis
granularity to control the amount of details in
characterization of buffer's spatio-temporal
footprints. Subsequently, we transform the buffer
allocation problem to two-dimensional packing of
polygons, where complexity of the packing problem
(e.g., polygon shapes) is determined by the analysis
granularity. We develop an evolutionary packing
optimization algorithm which readily yields buffer
allocations. Experimental results highlight the
trade-off between complexity of the analysis and the
total buffer size of generated implementations. In
addition, they show dramatic improvements in total
buffer size, if one is willing to pay the additional
cost in optimization runtime.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Salamy:2012:ISA,
author = "Hassan Salamy and J. Ramanujam",
title = "An {ILP} solution to address code generation for
embedded applications on digital signal processors",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "28:1--28:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209301",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Digital Signal Processors (DSPs) are a family of
embedded processors designed under tight memory, area,
and cost constraints. Many DSPs use irregular
addressing modes where base-plus-offset mode is not
supported. However, they often have Address Generation
Units (AGUs) that can perform auto-increment/decrement
address arithmetic instructions in parallel with
Load/Store instructions. This feature can be utilized
to reduce the number of explicit address arithmetic
instructions and thus reduce the embedded application
code size. This code size reduction is essential for
this family of DSP as the code usually resides in the
ROM and hence the code size directly translates into
silicon area. An effective technique for optimized code
generation is offset assignment. This is a well-used
technique in the literature to decrease the code size
by finding an offset assignment that can effectively
utilize auto-increment/decrement. This problem is known
as simple offset assignment when there is only one
address register and as General Offset Assignment (GOA)
for multiple available address registers. In this
article, we present an optimal Integer Linear
Programming (ILP) solution to the offset assignment
problem with variable coalescing where more than one
variable can share the same memory location. Variable
permutation is also formulated to find the best access
sequence to achieve the best offset assignment that
decreases the code size the most. Experimental results
on several benchmarks show the effectiveness of our
variable permutation technique as well as the large
improvement from the ILP-based solutions compared to
heuristics.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Schafer:2012:DCH,
author = "Benjamin Carrion Schafer and Kazutoshi Wakabayashi",
title = "Divide and conquer high-level synthesis design space
exploration",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "29:1--29:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209302",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A method to accelerate the Design Space Exploration
(DSE) of behavioral descriptions for high-level
synthesis based on a divide and conquer method called
Divide and Conquer Exploration Algorithm (DC-ExpA) is
presented. DC-ExpA parses an untimed behavioral
description given in C or SystemC and clusters
interdependent operations which are in turn explored
independently by inserting synthesis directives
automatically in the source code. The method then
continues by combining the exploration results to
obtain only Pareto-optimal designs. This method
accelerates the design space exploration considerably
and is compared against two previous methods: an
Adaptive Simulated Annealer Exploration Algorithm
(ASA-ExpA) that shows good optimality at high runtimes,
and a pattern matching method called Clustering Design
Space Exploration Acceleration (CDS-ExpA) that is fast
but suboptimal. Our proposed method is orthogonal to
previous exploration methods that focus on the
exploration of resource constraints, allocation,
binding, and/or scheduling. Our proposed method on
contrary sets local synthesis directives that decide
upon the overall architectural structure of the design
(e.g., mapping certain arrays to memories or
registers). Results show that DC-ExpA explores the
design space on average 61\% faster than ASA-ExpA,
obtaining comparable results indicated by several
quality indicators, for example, distance to reference
Pareto-front, hypervolume, and Pareto dominance.
Compared to CDS-ExpA it is 69\% slower, but obtains
much betters results compared to the same quality
indicators.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Karfa:2012:FVC,
author = "Chandan Karfa and Chittaranjan Mandal and Dipankar
Sarkar",
title = "Formal verification of code motion techniques using
data-flow-driven equivalence checking",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "30:1--30:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209303",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A formal verification method for checking correctness
of code motion techniques is presented in this article.
Finite State Machine with Datapath (FSMD) models have
been used to represent the input and the output
behaviors of each synthesis step. The method introduces
cutpoints in one FSMD, visualizes its computations as
concatenation of paths from cutpoints to cutpoints, and
then identifies equivalent finite path segments in the
other FSMD; the process is then repeated with the FSMDs
interchanged. Unlike many other reported techniques,
the method is capable of verifying both uniform and
nonuniform code motion techniques. It has been
underlined in this work that for nonuniform code
motions, identifying equivalent path segments involves
model checking of some data-flow properties. Our method
automatically identifies the situations where such
properties are needed to be checked during equivalence
checking, generates the appropriate properties, and
invokes the model checking tool NuSMV to verify them.
The correctness and the complexity of the method have
been dealt with. Experimental results demonstrate the
effectiveness of the method.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Linehan:2012:MDA,
author = "{\'E}amonn Linehan and Eamonn O'Toole and Siobh{\'a}n
Clarke",
title = "Model-driven automation for simulation-based
functional verification",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "31:1--31:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209304",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Developing testbenches for dynamic functional
verification of hardware designs is a
software-intensive process that lies on the critical
path of electronic system design. The increasing
capabilities of electronic components is contributing
to the construction of complex verification
environments that are increasingly difficult to
understand, maintain, extend, and reuse across
projects. Model-driven software engineering addresses
issues of complexity, productivity, and code quality
through the use of high-level system models and
subsequent automatic transformations. Reasoning about
verification testbench decomposition becomes simpler at
higher levels of abstraction. In particular, the
aspect-oriented paradigm, when applied at the model
level, can minimize the overlap in functionality
between modules, improving maintainability and
reusability. This article presents an aspect-oriented
model-driven engineering process and toolset for the
development of hardware verification testbenches. We
illustrate how this process and toolset supports
modularized design and automatic transformation to
verification environment-specific models and source
code through an industry case study.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Qian:2012:FPS,
author = "Haifeng Qian and Sachin S. Sapatnekar and Eren
Kursun",
title = "{Fast Poisson Solvers} for thermal analysis",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "32:1--32:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209305",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Accurate and efficient thermal analysis for a VLSI
chip is crucial, both for sign-off reliability
verification and for design-time circuit optimization.
To determine an accurate temperature profile, it is
important to simulate a die together with its thermal
mounts: this requires solving Poisson's equation on a
nonrectangular 3D domain. This article presents a class
of eigendecomposition-based Fast Poisson Solvers (FPS)
for chip-level thermal analysis. We start with a solver
that solves a rectangular 3D domain with mixed boundary
conditions in $ O(N \cdot \log N) $ time, where $N$ is
the dimension of the finite difference matrix. Then we
reveal, for the first time in the literature, a strong
relation between fast Poisson solvers and
Green-function-based methods. Finally, we propose an
FPS method that leverages the preconditioned conjugate
gradient method to solve nonrectangular 3D domains
efficiently. We demonstrate this approach on thermal
analysis of an industrial microprocessor, showing
accurate results verified by a commercial tool, and
that it solves a system of dimension 4.54e6 in only 13
conjugate gradient iterations, with a runtime of 65
seconds, a 15X speedup over the popular ICCG solver.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Guthaus:2012:HPC,
author = "Matthew R. Guthaus and Xuchu Hu and Gustavo Wilke and
Guilherme Flach and Ricardo Reis",
title = "High-performance clock mesh optimization",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "33:1--33:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209306",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Clock meshes are extremely effective at producing
low-skew regional clock networks that are tolerant of
environmental and process variations. For this reason,
clock meshes are used in most high-performance designs,
but this robustness consumes significant power. In this
work, we present two techniques to optimize
high-performance clock meshes. The first technique is a
mesh perturbation methodology for nonuniform mesh
routing. The second technique is a skew-aware buffer
placement through iterative buffer deletion. We
demonstrate how these optimizations can achieve
significant power reductions and a near elimination of
short-circuit power. In addition, the total wire length
is decreased, the number of required buffers is
decreased, and both skew and robustness are improved on
average when variation is considered.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2012:LBC,
author = "Kuan-Yu Lin and Hong-Ting Lin and Tsung-Yi Ho and
Chia-Chun Tsai",
title = "Load-balanced clock tree synthesis with adjustable
delay buffer insertion for clock skew reduction in
multiple dynamic supply voltage designs",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "34:1--34:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209307",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power consumption is known to be a crucial issue in
current IC designs. To tackle this problem, Multiple
Dynamic Supply Voltage (MDSV) designs are proposed as
an efficient solution for power savings. However, the
increasing variability of clock skew during the
switching of power modes leads to an increase in the
complication of clock skew reduction in MDSV designs.
In this article, we propose a load-balanced clock tree
synthesizer with Adjustable Delay Buffer (ADB)
insertion for clock skew reduction in MDSV designs. The
clock tree synthesizer adopts the Minimum Spanning Tree
(MST) metric to estimate the interconnect capacitance
and execute the graph-theoretic clustering. The
power-mode-guided optimization is also embedded into
the clock tree synthesizer for improving additional
area overhead in the step of ADB insertion. After
constructing the initial buffered clock tree, we insert
the ADBs with delay value assignments to reduce clock
skew in MDSV designs. The ADBs can be used to produce
additional delays, hence the clock latencies and skew
become tunable in a clock tree. An efficient algorithm
of ADB insertion for the minimization of clock skew,
area, and runtime in MDSV designs has been presented.
Comparing with the state-of-the-art algorithm of ADB
insertion, experimental results show maximum 42.40\%
area overhead improvement. With the power-mode-guided
optimization, the maximum improvement of area overhead
can increase to 47.87\%.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2012:FHA,
author = "Chien-Nan Jimmy Liu and Yen-Lung Chen and Chin-Cheng
Kuo and I-Ching Tsai",
title = "A fast heuristic approach for parametric yield
enhancement of analog designs",
journal = j-TODAES,
volume = "17",
number = "3",
pages = "35:1--35:??",
month = jun,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2209291.2209308",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jul 31 16:58:51 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In traditional yield enhancement approaches, a lot of
computation efforts have to be paid first to find the
feasible regions and the Pareto fronts, which will
become a heavy cost for large analog circuits. In order
to reduce the computation efforts, this article
proposes a fast heuristic approach that tries to finish
all iteration steps of the yield enhancement flow at
behavior level. First, a novel force-directed Nominal
Point Moving (NPM) algorithm is proposed to find a
better nominal point without building the feasible
regions. Then, an equation-based behavior-level sizing
approach is proposed to map the NPM results at
performance level to behavior-level parameters. A fast
behavior-level Monte Carlo simulation is also proposed
to shorten the iterative yield enhancement flow.
Finally, using the obtained behavioral parameters as
the sizing targets of each subblock, the device sizing
time is significantly reduced instead of sizing from
the system-level specifications directly. As
demonstrated on several analog circuits, this heuristic
approach could be another efficient methodology to help
designers improve their analog circuits toward better
yield.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tu:2012:MFS,
author = "Chia-Heng Tu and Shih-Hao Hung and Tung-Chieh Tsai",
title = "{MCEmu}: a Framework for Software Development and
Performance Analysis of Multicore Systems",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "36:1--36:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348840",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Developing software for heterogeneous multicore
systems is particularly challenging even for
experienced developers. While emulators have proven
useful to application development, very few
heterogeneous multicore emulators have been made
available by vendors so far, as building an emulator
for a heterogeneous multicore system has been a
time-consuming and difficult task. Thus, we proposed a
framework, called MCEmu, to speed up the process of
building a heterogeneous multicore emulator by
integrating existing and/or new processor emulators.
MCEmu is designed to help system and application
development, with a basic multicore board support
package, an interprocessor communication library, and
tools for debugging, tracing, and performance
monitoring. In addition, MCEmu can run on a multicore
host system to accelerate the emulation of data
parallel applications. We show that MCEmu can be very
useful for developing system software before the system
becomes available, as it has helped us catch numerous
functional and performance bugs which could have been
hard to find. In this article, we present the design of
MCEmu and demonstrate its capabilities with our case
studies.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Alizadeh:2012:FVD,
author = "Bijan Alizadeh",
title = "Formal Verification and Debugging of Precise
Interrupts on High Performance Microprocessors",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "37:1--37:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348841",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The increased parallelism provided by Out-Of-Order
(OOO) and superscalar mechanisms have made the control
portion of advanced processors more complicated so that
the state-of-the-art formal verification techniques for
Register-Transfer-Level (RTL) and gate-level designs
cannot scale to the complexity of such complicated
processors. Moreover, verification and debugging of
exceptions and external interrupts on such processors
are nontrivial tasks. Because the exceptions arrival
time, the external interrupt arrival time, as well as
the microprocessor response time must be precise,
verification and debugging require sophisticated
hardware and software capabilities. This article
proposes techniques for effective verification and
debugging of cycle-accurate OOO processors in the event
of exceptions and external interrupts. The results show
that our techniques reduce the complexity of the
verification and debugging processes by reducing the
number of simulation cycles (3.3 $ \times $ average
reduction) and the number of state variables (8.7 $
\times $ average reduction) to be traced for localizing
bugs.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mukherjee:2012:SAA,
author = "Subhankar Mukherjee and Pallab Dasgupta and Siddhartha
Mukhopadhyay and Scott Little and John Havlicek and
Srikanth Chandrasekaran",
title = "Synchronizing {AMS} Assertions with {AMS} Simulation:
From Theory to Practice",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "38:1--38:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348842",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The verification community anticipates the adoption of
assertions in the Analog and Mixed-Signal (AMS) domain
in the near future. Several questions need to be
answered before AMS assertions are brought into
practice, such as: (a) How will the languages for AMS
assertions be different from the ones in the digital
domain? (b) Does the analog simulator have to be
assertion aware? (c) If so, then how and where on the
time line will the AMS assertion checker synchronize
with the analog simulator? and (d) What will be the
performance penalty for monitoring AMS assertions
accurately over analog simulation? This article
attempts to answer these questions through theoretical
analysis and empirical results obtained from industrial
test cases. We study logics which extend Linear
Temporal Logic (LTL) with predicates over real
variables, and show that further extensions allowing
the binding of real-valued variables across time makes
the logic undecidable. We present a toolkit which can
integrate with existing AMS simulators for checking AMS
assertions on practical designs. We study the problem
of synchronizing the AMS simulator with the AMS
assertion checker and demonstrate the performance
penalty of different synchronization options.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2012:RSP,
author = "Hai Lin and Yunsi Fei",
title = "Resource Sharing of Pipelined Custom Hardware
Extension for Energy-Efficient Application-Specific
Instruction Set Processor Design",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "39:1--39:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348843",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Application-Specific Instruction set Processor (ASIP)
has become an increasingly popular platform for
embedded systems because of its high performance,
flexibility, and short turn-around time. The hardware
extension in ASIPs can speed-up program execution.
However, it also incurs area overhead and extra static
energy consumption. Traditional datapath merging
techniques reduce the circuit overhead by reusing
hardware modules for executing multiple operations.
However, they introduce structural hazard for multiple
custom instructions in sequence, and hence reduce the
performance improvement. In this article, we introduce
a pipelined configurable structure for the hardware
extension in ASIPs, so that structural hazards can be
remedied. With multiple subgraphs of operations
selected, we design a novel operation-to-hardware
mapping algorithm based on Integer Linear Programming
(ILP) to automatically construct a resource-efficient
pipelined configurable functional unit. Different
resource sharing schemes would affect both the hardware
overhead and the overall performance improvement. We
analyze the design trade-offs between resource
efficiency and performance improvement. At the end, we
present our design space exploration results by setting
the optimization objective to area, area and delay, and
delay respectively.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2012:HSC,
author = "Hai Lin and Tiansi Hu and Yunsi Fei",
title = "A Hardware\slash Software Cooperative Custom Register
Binding Approach for Register Spill Elimination in
Application-Specific Instruction Set Processors",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "40:1--40:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348844",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Application-Specific Instruction set Processor (ASIP)
has become an important design choice for embedded
systems. It can achieve both high flexibility offered
by the base processor core and high performance and
energy efficiency offered by the dedicated hardware
extensions. Although a lot of efforts have been devoted
to computation acceleration, for example, automatic
custom instruction identification and synthesis,
limited on-chip data storage elements including the
register file and data cache have become a potential
performance bottleneck. For custom instructions that
have more inputs and/or outputs than the generic
register file I/O ports, custom registers are added in
ASIPs to satisfy the need of additional inputs and
outputs, and traditionally they are used only by custom
instructions. In this article, we propose a
hardware/software cooperative approach with a linear
scan register allocation algorithm, which allows base
instructions to utilize the existing custom registers
in ASIPs for eliminating register spills of the
program. The data traffic between the base processor
and off-chip memory can be replaced with
energy-efficient on-chip communications between the
processor core and custom hardware extensions. Our
experimental results demonstrate that a significant
performance gain can be achieved, orthogonal to
improvements by other techniques in ASIP design.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2012:BOD,
author = "An-Ping Wang and Jiwon Hahn and Mahshid Roumi and Pai
H. Chou",
title = "Buffer Optimization and Dispatching Scheme for
Embedded Systems with Behavioral Transparency",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "41:1--41:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348845",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a buffer minimization scheme
with low dispatching overhead for embedded software
processes. To accomplish this, we exploit behavioral
transparency in the model of computation. In such a
model (e.g., synchronous dataflow), the state of buffer
requirements is determined completely by the firing
sequence of the actors without requiring functional
simulation of the actors. Fine-grained buffer
allocation incurs high and code pointer overhead while
coarse-grained allocation suffers from memory
fragmentation. Instead, we propose a medium-grained,
``access-contiguous'' buffer allocation scheme that
minimizes the total buffer space and pointer overhead.
We formulate the buffer allocation problem as 2D tiles
that represent the lifetime of the buffers to minimize
their memory occupation spatially and temporally.
Experimental results show that our scheme uses less
data memory than existing techniques by 26\% on
average, or up to 57\% in the best case. Our technique
retains code modularity for dynamic configuration and,
more importantly, enables many more applications that
otherwise would not fit if implemented using previous
state-of-the-art techniques.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gately:2012:AJO,
author = "Matthew B. Gately and Mark B. Yeary and Choon Yik
Tang",
title = "An Algorithm for Jointly Optimizing Quantization and
Multiple Constant Multiplication",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "42:1--42:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348846",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a joint framework for
quantization and Multiple Constant Multiplication (MCM)
optimization, which yields a computationally efficient
implementation of multiplierless multiplication in
hardware and software. Frameworks of this nature have
been developed in the context of Finite Impulse
Response (FIR) filters, where frequency response
specifications are used to drive the design. In this
work, we look at a general case, considering as given a
vector of ideal, real constants, which may come from
any application and do not necessarily represent FIR
filter coefficients. We first formulate a joint
optimization problem for finding a fixed-point vector
and a shift-add network that are optimal in terms of
quantization error and MCM complexity. We then describe
ways to finitize and prune the search space, leading to
an efficient algorithm called JOINT\_SOLVE that solves
the problem. Finally, via extensive randomized
experiments, we show that our joint framework is
notably more computationally efficient than a
disjointed one, reducing the MCM cost by 15\%--60\% on
moderate size problems.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2012:SAH,
author = "Yonghwan Kim and Sanghoon Kwak and Taewhan Kim",
title = "Synthesis of Adaptable Hybrid Adders for Area
Optimization under Timing Constraint",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "43:1--43:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348847",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Satisfying the timing constraint is the utmost concern
in the integrated circuit design and it is true that
most critical timing paths in a circuit cover one or
more arithmetic components such as adder, subtractor,
and multiplier of which addition logic is commonly
involved. This work addresses the problem of
redesigning the addition logic (in a form of hybrid
adder) on a critical timing path to meet the timing
constraint while minimally allocating the required
addition logic. Unlike the conventional hybrid adder
design schemes in which they assume uniform or specific
patterns of input signal arrival times and minimize the
latest timing of the output signals, our work extracts
the required timing of each output signal as well as
the input arrival times directly from the circuit and
resynthesizes the addition logic by creating a
customized hybrid adder that is best suited, in terms
of logic area, for meeting the timing constraint of the
circuit. Specifically, we propose a systematic approach
of hybrid adder design exploration, basically following
the principle of dynamic programming with
well-controlled pruning techniques. This work is
realistic and practically very useful in that it can be
used as a timing optimizer to the computation-intensive
circuits with a tight timing budget. We provide a set
of diverse experimental data to show how much the
proposed hybrid adder scheme is effective in meeting or
reducing timing while maintaining the circuit area as
minimal as possible.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Backes:2012:SCD,
author = "John D. Backes and Marc D. Riedel",
title = "The Synthesis of Cyclic Dependencies with {Boolean}
Satisfiability",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "44:1--44:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348848",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The accepted wisdom is that combinational circuits
must have acyclic (i.e., feed-forward) topologies. Yet
simple examples suggest that this is incorrect. In
fact, introducing cycles (i.e., feedback) into
combinational designs can lead to significant savings
in area and in delay. Prior work described
methodologies for synthesizing cyclic circuits with
Sum-Of-Product (SOP) and Binary-Decision Diagram
(BDD)-based formulations. Recently, techniques for
analyzing and mapping cyclic circuits based on Boolean
satisfiability (SAT) were proposed. This article
presents a SAT-based methodology for synthesizing
cyclic dependencies. The strategy is to generate cyclic
functional dependencies through a technique called
Craig interpolation. Given a choice of different
functional dependencies, a branch-and-bound search is
performed to pick the best one. Experiments on
benchmark circuits demonstrate the effectiveness of the
approach.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bild:2012:SNR,
author = "David R. Bild and Robert P. Dick and Gregory E. Bok",
title = "Static {NBTI} Reduction Using Internal Node Control",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "45:1--45:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348849",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Negative Bias Temperature Instability (NBTI) is a
significant reliability concern for nanoscale CMOS
circuits. Its effects on circuit timing can be
especially pronounced for circuits with standby-mode
equipped functional units, because these units can be
subjected to static NBTI stress for extended periods of
time. This article describes Internal Node Control
(INC), in which the inputs to some individual gates are
directly manipulated to prevent this static NBTI
fatigue. We prove that the INC selection problem is
NP-complete and present a linear-time heuristic that
can quickly determine near-optimal placements. This
near-optimality is confirmed by comparing results for
small benchmarks against optimal solutions from a mixed
integer linear programming formulation of our problem.
We evaluate the heuristic on the ISCAS85 benchmarks and
the Synopsys DesignWare Library. Our heuristic reduces
static NBTI-induced delay over a ten year period by
30--60\% and can reduce total path delay by an average
9.4\% when NBTI degradation is severe. The INC
placements and sleep signal routing require only a
1.6\% increase in area.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2012:CDA,
author = "Nai-Wen Chang and Tzu-Yin Lin and Sun-Yuan Hsieh",
title = "Conditional Diagnosability of $k$-Ary $n$-Cubes under
the {PMC} Model",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "46:1--46:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348850",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Processor fault diagnosis plays an important role in
measuring the reliability of multiprocessor systems and
the diagnosis of many well-known interconnection
networks. The conditional diagnosability, which is more
general than the classical diagnosability, is to
measure the diagnosability of a multiprocessor system
under the assumption that all of the neighbors of any
node in the system cannot fail at the same time. This
study shows that the conditional diagnosability for
$k$-ary $n$-cubes under the PMC model is $ 8 n - 7$ for
$ k \geq 4$ and $ n \geq 4$.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mondal:2012:SEP,
author = "Arijit Mondal and P. P. Chakrabarti and Pallab
Dasgupta",
title = "Symbolic-Event-Propagation-Based Minimal Test Set
Generation for Robust Path Delay Faults",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "47:1--47:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348851",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a symbolic-event-propagation-based scheme
to generate hazard-free tests for robust path delay
faults. This approach identifies all robustly testable
paths in a circuit and the corresponding complete set
of test vectors. We address the problem of finding a
minimal set of test vectors that covers all robustly
testable paths. We propose greedy and
simulated-annealing-based algorithms to find the same.
Results on ISCAS89 benchmark circuits show a
considerable reduction in test vectors for covering all
robustly testable paths.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2012:LST,
author = "Shianling Wu and Laung-Terng Wang and Xiaoqing Wen and
Wen-Ben Jone and Michael S. Hsiao and Fangfang Li and
James Chien-Mo Li and Jiun-Lang Huang",
title = "Launch-on-Shift Test Generation for Testing Scan
Designs Containing Synchronous and Asynchronous Clock
Domains",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "48:1--48:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348852",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a hybrid Automatic Test Pattern
Generation (ATPG) technique using the staggered
Launch-On-Shift (LOS) scheme followed by the one-hot
launch-on-shift scheme for testing delay faults in a
scan design containing asynchronous clock domains.
Typically, the staggered scheme produces small test
sets but needs long ATPG runtime, whereas the one-hot
scheme takes short ATPG runtime but yields large test
sets. The proposed hybrid technique is intended to
reduce test pattern count with acceptable ATPG runtime
for multimillion-gate scan designs. In case the scan
design contains multiple synchronous clock domains, and
each group of synchronous clock domains is treated as a
clock group and tested using a launch-aligned or a
capture-aligned LOS scheme. By combining these schemes
together, we found the pattern counts for two large
industrial designs were reduced by approximately 1.6X
to 1.8X, while the ATPG runtime was increased by 40\%
to 50\%, when compared to the one-hot clocking scheme
alone.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Khatib:2012:MRP,
author = "Mohammed G. Khatib",
title = "Migration-Resistant Policies for Probe-Wear Leveling
in {MEMS} Storage Devices",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "49:1--49:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348853",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Probes (read/write heads) in a MEMS storage device are
susceptible to wear. We study probe wear, and analyze
the causes of uneven wear. We show that under
real-world workloads some probes can wear one order of
magnitude faster than others. This premature expiry has
severe consequences for reliability, timing
performance, energy efficiency, and lifetime. Wear
leveling precludes premature expiry and is thus
necessary. We discuss the fundamental differences
between probe wear in MEMS storage and medium wear in
Flash, calling for a different treatment. We devise
three policies to level probe wear. The policies
provide a spectrum between best lifetime and least
influence on the response time and energy efficiency of
a MEMS storage device. We make the case that data
migration can be prevented by augmenting the policies
with a simple rule. We study the influence of the data
layout configuration on the leveling performance of the
policies.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lam:2012:EPL,
author = "Tak-Kei Lam and Wai-Chung Tang and Xiaoqing Yang and
Yu-Liang Wu",
title = "{ECR}: a Powerful and Low-Complexity Error
Cancellation Rewiring Scheme",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "50:1--50:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348854",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Rewiring is known to be a class of logic restructuring
technique that is at least equally powerful in
flexibility compared to other logic transformation
techniques. Especially it is wiring sensitive and is
particularly useful for interconnect-based circuit
synthesis processes. One of the most well-studied
rewiring techniques is the ATPG-based Redundancy
Addition and Removal (RAR) technique which adds a
redundant alternative wire to make an originally
irredundant target wire become redundant and thus
removable. In this article, we propose a new
Error-Cancellation-based Rewiring scheme (ECR) which
can also identify non-RAR-based rewiring operations
with high efficiency. In ECR scheme, it is not
necessary for alternative wires to be redundant. Based
on the notion of error cancellation, we analyze and
reformulate the rewiring problem, and a more
generalized rewiring scheme is developed to detect more
rewiring cases which are not obtainable by existing
schemes while it still maintains a low runtime
complexity. Comparing with the most recent non-RAR
rewiring tool IRRA, the total number of alternative
wires found by our approach is about doubled (202\%)
while the CPU time used is just slightly more (8\%)
upon benchmarks preoptimized by ABC's rewriting. Our
experimental results also suggest that the ECR engine
is more powerful than IRRA in FPGA technology
mapping.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shen:2012:FSF,
author = "Ruijing Shen and Sheldon X.-D. Tan and Hai Wang and
Jinjun Xiong",
title = "Fast Statistical Full-Chip Leakage Analysis for
Nanometer {VLSI} Systems",
journal = j-TODAES,
volume = "17",
number = "4",
pages = "51:1--51:??",
month = oct,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2348839.2348855",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 22 10:59:18 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present a new full-chip
statistical leakage estimation considering the spatial
correlation condition (strong or weak). The new
algorithm can deliver linear time, O ( N ), time
complexity, where N is the number of grids on chip. The
proposed algorithm adopts a set of uncorrelated virtual
variables over grid cells to represent the original
physical random variables and the cell size is
determined by the spatial correlation length. In this
way, each physical variable is always represented by
virtual variables locally. We prove the number of
neighbor cells for each grid cell is not related to the
condition of spatial correlation (from no correlation
to 100\% correlated), which leads to linear time
complexity in terms of number of gates. We compute the
gate leakage by the orthogonal polynomials-based
collocation method. The total leakage of a whole chip
can be computed by simply summing up the coefficients
of corresponding orthogonal polynomials in each grid
cell. Furthermore, we develop a look-up table to cache
statistical information for each type of gate instead
of calculating leakage for every single instance of
gate on a chip. As a result, a new statistical leakage
characterization in Standard Cell Library (SCL) is put
forward. Furthermore, an incremental analysis algorithm
is proposed to update the chip-level statistical
leakage information efficiently after a few changes are
made. The proposed method has no restrictions on static
leakage models, or types of leakage distributions. The
large circuit examples in 45nm CMOS process demonstrate
the proposed algorithm is 1000X faster than a recently
proposed grid-based method with similar accuracy and
many orders of magnitude times speedup over the Monte
Carlo method. Experimental results also show the
incremental analysis provides about 10X further
speedup. We expect the incremental analysis could
achieve more speedup over the full leakage analysis for
larger problem sizes.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Coskun:2012:ISS,
author = "Ayse Kivilcim Coskun and Yung-Hsiang Lu and Qinru
Qiu",
title = "Introduction to the special section on adaptive power
management for energy and temperature-aware computing
systems",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390192",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lari:2012:HPM,
author = "Vahid Lari and Shravan Muddasani and Srinivas Boppu
and Frank Hannig and Moritz Schmid and J{\"u}rgen
Teich",
title = "Hierarchical power management for adaptive
tightly-coupled processor arrays",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390193",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a self-adaptive hierarchical power
management technique for massively parallel processor
architectures, supporting a new resource-aware parallel
computing paradigm called invasive computing. Here, an
application can dynamically claim, execute, and release
the resources in three phases: resource acquisition
(invade), program loading/configuration and execution
(infect), and release (retreat). Resource invasion is
governed by dedicated decentralized hardware
controllers, called invasion controllers (i ctrls),
which are integrated into each processing element (PE).
Several invasion strategies for claiming linearly
connected or rectangular regions of processing
resources are implemented. The key idea is to exploit
the decentralized resource management inherent to
invasive computing for power savings by enabling
applications themselves to control the power for
processing resources and invasion controllers using a
hierarchical power-gating approach. We propose
analytical models for estimating various components of
energy consumption for faster design space exploration
and compare them with the results obtained from a
cycle-accurate C++ simulator of the processor array. In
order to find optimal design trade-offs, various
parameters like (a) energy consumption, (b) hardware
cost, and (c) timing overheads are compared for
different sizes of power domains. Experimental results
show significant energy savings (up to 73\%) for
selected characteristical algorithms and different
resource utilizations. In addition, we demonstrate the
accuracy of our proposed analytical model. Here,
estimation errors less than 3.6\% can be reported.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Srivastav:2012:DEE,
author = "Meeta Srivastav and M. B. Henry and Leyla Nazhandali",
title = "Design of energy-efficient, adaptable throughput
systems at near\slash sub-threshold voltage",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390194",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Voltage scaling has been a prevalent method of saving
energy for energy-constrained applications. However,
current technology trends which shrink transistors
sizes exacerbate process variation effects in
voltage-scaled systems. Large variations in transistor
parameters result in high variation in performance and
power across the chip. These effects, if ignored at the
design, stage, will result in unpredictable behavior
when deployed in the field. In this article, we
leverage the benefits of voltage scaling methodology
for obtaining energy efficiency and compensate for the
loss in throughput by exploiting parallelism present in
the various DSP designs. We show that such a hybrid
method consumes 8\%--77\% less power, compared to
simple dynamic voltage scaling over different
throughputs. We study this system architecture in two
different workload environments: static and dynamic. We
show that to achieve the highest level of energy
efficiency, the number of cores and the operating
voltages vary widely between a BASE design versus a
process variation-aware (PVA) design. We further
demonstrate that the PVA design enjoys an average of
26.9\% and 51.1\% reduction in energy consumption for
the static and dynamic designs, respectively. Since
different cores will have a wide range of speeds at
operating voltages close to near/sub-thresholds due to
process variation, we gather characteristic behavior of
each core. With knowledge of the core speeds, we can
further increase the energy efficiency. Furthermore, in
this article, we show that of this methodology will be
49.3\% more energy efficient, compared to that building
the system with no knowledge about the characteristics
of each core.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sun:2012:STD,
author = "Jin Sun and Rui Zheng and Jyothi Velamala and Yu Cao
and Roman Lysecky and Karthik Shankar and Janet
Roveda",
title = "A self-tuning design methodology for power-efficient
multi-core systems",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390195",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article aims to achieve computational reliability
and energy efficiency through codevelopment of
algorithms, device, and circuit designs for
application-specific, reconfigurable architectures. The
new methodology characterizes aging-switching activity
and aging-supply voltage relationships that are
applicable for minimizing power consumption and task
execution efficiency in order to achieve low bit energy
ratio (BER). In addition, a new dynamic management
algorithm (DMA) is proposed to alleviate device
degradation and to extend system lifespan. In contrast
to traditional workload balancing schemes in which
cores are regarded as homogeneous, the new algorithm
ranks cores as ``highly competitive,'' ``less
competitive,'' and ``not competitive'' according to
their various competitiveness. Core competitiveness is
evaluated based upon their reliability, temperature,
and timing requirements. Consequently, ``competitive''
cores will take charge of the majority of the tasks at
relatively high voltage/frequency without violating
power and timing budgets, while ``not competitive''
cores will have light workloads to ensure their
reliability. The new approach combines intrinsic device
characteristics (aging-switching activity and
aging-supply voltage curves) into an integrated
framework to achieve high reliability and low energy
level with graceful degradation of system performance.
Experimental results show that the proposed method has
achieved up to 20\% power reduction, with about 4\%
performance degradation (in terms of accomplished
workload and system throughput), compared with
traditional workload balancing methods. The new method
also improves system mean-time-to-failure (MTTF) by up
to 25\%.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Rodrigues:2012:IPP,
author = "Rance Rodrigues and Arunachalam Annamalai and Israel
Koren and Sandip Kundu",
title = "Improving performance per watt of asymmetric
multi-core processors via online program phase
classification and adaptive core morphing",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390196",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Asymmetric multi-core processors (AMPs) have been
shown to outperform symmetric ones in terms of
performance and performance/watt. Improved performance
and power efficiency are achieved when the program
threads are matched to their most suitable cores. Since
the computational needs of a program may change during
its execution, the best thread to core assignment will
likely change with time. We have, therefore, developed
an online program phase classification scheme that
allows the swapping of threads when the current needs
of the threads justify a change in the assignment. The
architectural differences among the cores in an AMP can
never match the diversity that exists among different
programs and even between different phases of the same
program. Consider, for example, a program (or a program
phase) that has a high instruction-level parallelism
(ILP) and will exhibit high power efficiency if
executed on a powerful core. We can not, however,
include such powerful cores in the designed AMP, since
they will remain underutilized most of the time, and
they are not power efficient when the programs do not
exhibit a high degree of ILP. Thus, we must expect to
see program phases where the designed cores will be
unable to support the ILP that the program can exhibit.
We, therefore, propose in this article a dynamic
morphing scheme. This scheme will allow a core to gain
control of a functional unit that is ordinarily under
the control of a neighboring core during periods of
intense computation with high ILP. This way, we
dynamically adjust the hardware resources to the
current needs of the application. Our results show that
combining online phase classification and dynamic core
morphing can significantly improve the performance/watt
of most multithreaded workloads.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zanini:2012:OTC,
author = "Francesco Zanini and David Atienza and Colin N. Jones
and Luca Benini and Giovanni {De Micheli}",
title = "Online thermal control methods for multiprocessor
systems",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390197",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With technological advances, the number of cores
integrated on a chip is increasing. This in turn is
leading to thermal constraints and thermal design
challenges. Temperature gradients and hotspots not only
affect the performance of the system but also lead to
unreliable circuit operation and affect the lifetime of
the chip. Meeting temperature constraints and reducing
hotspots are critical for achieving reliable and
efficient operation of complex multi-core systems. In
this article, we analyze the use of four of the most
promising families of online control techniques for
thermal management of multiprocessors system-on-chip
(MPSoC). In particular, in our exploration, we aim at
achieving an online smooth thermal control action that
minimizes the performance loss as well as the
computational and hardware overhead of embedding a
thermal management system inside the MPSoC. The
definition of the optimization problem to tackle in
this work considers the thermal profile of the system,
its evolution over time, and current time-varying
workload requirements. Thus, this problem is formulated
as a finite-horizon optimal control problem, and we
analyze the control features of different online
thermal control approaches. In addition, we implemented
the policies on an MPSoC hardware simulation platform
and performed experiments on a cycle-accurate model of
the eight-core Niagara multi-core architecture using
benchmarks ranging from Web-accessing to playing
multimedia. Results show different trade-offs among the
analyzed techniques regarding the thermal profile, the
frequency setting, the power consumption, and the
implementation complexity.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cochran:2012:TPA,
author = "Ryan Cochran and Sherief Reda",
title = "Thermal prediction and adaptive control through
workload phase detection",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390198",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Elevated die temperature is a true limiter to the
scalability of modern processors. With continued
technology scaling in order to meet ever-increasing
performance demands, it is no longer cost effective to
design cooling systems that handle the worst-case
thermal behaviors. Instead, cooling systems are
designed to handle typical chip operation, while
processors must detect and handle rare thermal
emergencies. Most processors rely on measurements from
integrated thermal sensors and dynamic thermal
management (DTM) techniques in order to manage the
trade-off between performance and thermal risk. Optimal
management requires advanced knowledge of the thermal
trajectory based on the current workload behaviors and
operating conditions. In this work, we devise novel
workload phase classification strategies that
automatically discriminate among workload behaviors
with respect to the thermal control response. We
incorporate workload phase-detection and thermal models
into a dynamic voltage and frequency scaling (DVFS)
technique that can optimally control temperature during
runtime based on thermal predictions. We demonstrate
the effectiveness of our proposed techniques in
predicting and adaptively controlling the thermal
behavior of a real quad-core processor in response to a
wide range of workloads. In comparison with
state-of-the-art model predictive control (MPC)
techniques in previous works on thermal prediction, we
demonstrate a 5.8\% improvement in instruction
throughput with the same number of thermal violations.
In comparison with simple proportional-integral (PI)
feedback control techniques, we improve instruction
throughput by 3.9\%, while significantly reducing the
number of thermal violations.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shi:2012:HND,
author = "Liang Shi and Jianhua Li and Chun Jason Xue and Xuehai
Zhou",
title = "Hybrid nonvolatile disk cache for energy-efficient and
high-performance systems",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "8:1--8:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390199",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "NAND flash memory has been employed as disk cache in
recent years. It has the advantages of high
performance, low leakage power, and cost efficiency.
However, flash memory's performance is limited by the
inability of in-place updates, coarse access
granularity, and a limited number of write/erase times.
In this article, we propose a hybrid nonvolatile disk
cache architecture for high-performance and
energy-efficient systems, where the disk cache is
implemented with a small-size phase change memory (PCM)
and a large-size NAND flash memory. Compared with
current flash memory-based disk cache, it has the
following advantages. (1) System performance is
improved as requests are carefully directed between PCM
and flash memory; (2) the energy consumption of disk
cache is substantially reduced with significant
reduction of additional operations, such as garbage
collections; (3) the efficiency of flash memory is
improved with the reduction of write activities on
flash memory; and (4) lifetime of NAND flash memory is
increased with most of the write operations assigned to
PCM, where PCM's lifetime is guaranteed to be longer
than the lifetime of flash memory. Simulation results
show that the proposed methods can substantially
improve the system performance, energy consumption, and
lifetime of the hybrid disk cache.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Singh:2012:ATA,
author = "Amit Kumar Singh and Akash Kumar and Thambipillai
Srikanthan",
title = "Accelerating throughput-aware runtime mapping for
heterogeneous {MPSoCs}",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "9:1--9:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390200",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern embedded systems need to support multiple
time-constrained multimedia applications that often
employ multiprocessor-systems-on-chip (MPSoCs). Such
systems need to be optimized for resource usage and
energy consumption. It is well understood that a
design-time approach cannot provide timing guarantees
for all the applications due to its inability to cater
for dynamism in applications. However, a runtime
approach consumes large computation requirements at
runtime and hence may not lend well to
constrained-aware mapping. In this article, we present
a hybrid approach for efficient mapping of applications
in such systems. For each application to be supported
in the system, the approach performs extensive
design-space exploration (DSE) at design time to derive
multiple design points representing throughput and
energy consumption at different resource combinations.
One of these points is selected at runtime efficiently,
depending upon the desired throughput while optimizing
for energy consumption and resource usage. While most
of the existing DSE strategies consider a fixed
multiprocessor platform architecture, our DSE considers
a generic architecture, making DSE results applicable
to any target platform. All the compute-intensive
analysis is performed during DSE, which leaves for
minimum computation at runtime. The approach is capable
of handling dynamism in applications by considering
their runtime aspects and providing timing guarantees.
The presented approach is used to carry out a DSE case
study for models of real-life multimedia applications:
H.263 decoder, H.263 encoder, MPEG-4 decoder, JPEG
decoder, sample rate converter, and MP3 decoder. At
runtime, the design points are used to map the
applications on a heterogeneous MPSoC. Experimental
results reveal that the proposed approach provides
faster DSE, better design points, and efficient runtime
mapping when compared to other approaches. In
particular, we show that DSE is faster by 83\% and
runtime mapping is accelerated by 93\% for some cases.
Further, we study the scalability of the approach by
considering applications with large numbers of tasks.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Saladi:2012:CAC,
author = "Kalyan Saladi and Harikumar Somakumar and Mahadevan
Ganapathi",
title = "Concurrency-aware compiler optimizations for hardware
description languages",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "10:1--10:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390201",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we discuss the application of
compiler technology for eliminating redundant
computation in hardware simulation. We discuss how
concurrency in hardware description languages (HDLs)
presents opportunities for expression reuse across
different threads. While accounting for discrete event
simulation semantics, we extend the data flow analysis
framework to concurrent threads. In this process, we
introduce a rewriting scheme named $ \partial $VF and a
graph representation to model sensitivity relationships
among threads. An algorithm for identifying common
subexpressions as applied to HDLs is presented. Related
issues, such as scheduling correctness, are also
considered.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xydis:2012:CLE,
author = "Sotirios Xydis and Kiamal Pekmestzi and Dimitrios
Soudris and George Economakos",
title = "Compiler-in-the-loop exploration during datapath
synthesis for higher quality delay-area trade-offs",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "11:1--11:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390202",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Design space exploration during high-level synthesis
targets the computation of those design solutions which
form optimal trade-off points. This quest for optimal
trade-offs has been focused on studying the impact of
various architectural-level parameters during
high-level synthesis algorithms, silently neglecting
the trade-offs produced from the combined impact of
behavioral-level together with architectural-level
parameters. We propose a novel design space,
exploration methodology that studies an extended
instance of the solution space considering the effects
of combining compiler- and architectural-level
transformations. It is shown that exploring the design
space in a global manner reveals new trade-off points,
thus shifting towards higher quality design solutions.
We use a combination of upper-bounding conditions
together with gradient-based heuristic pruning to
efficiently traverse the extended search space. Our
exploration framework delivers significant quality
improvements without compromising the optimality
(Pareto accuracy) of the discovered solutions, together
with significant runtime reductions compared to
exploring exhaustively the solution space at every
allocation scenario.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kurimoto:2012:VWR,
author = "Masanori Kurimoto and Takeshi Yamamoto and Satoshi
Nakano and Atsuto Hanami and Hiroyuki Kondo",
title = "Verification work reduction methodology in low-power
chip implementation",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "12:1--12:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390203",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In order to achieve satisfactory verification for
complicated low-power demands in green products, we
propose a verification work reduction methodology. It
consists of three step, namely virtual, direct actual,
and actual model simulations. Virtual low-power
simulation inserts low-power cells, such as isolators
or level shifters, virtually and simulates logical
behavior for design under test (DUT) based on
user-defined power mode. Direct actual low-power
simulation replaces behavior models without non-logical
pins for some of modules with actual models with
non-logical pins, which are Vdd and Gnd, and simulates
DUT in mixed level. Actual low-power simulation
simulates DUT by using actual models with non-logical
pins for all cells and hard macros. We introduce
techniques which classify the type of the bugs on which
we focus at each verification step and prevent the
concerned bugs from leaking to the latter verification
step as much as possible. We applied our methodology to
an actual chip and could reduce the total simulation
period until tape-out by 38.8\% and the total chip
development period by 10\%, compared with the
conventional methodology.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jing:2012:SFE,
author = "Naifeng Jing and Ju-Yueh Lee and Zhe Feng and Weifeng
He and Zhigang Mao and Lei He",
title = "{SEU} fault evaluation and characteristics for
{SRAM}-based {FPGA} architectures and synthesis
algorithms",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "13:1--13:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390204",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Reliability has become an increasingly important
concern for SRAM-based field programmable gate arrays
(FPGAs). Targeting SEU (single event upset) in
SRAM-based FPGAs, this article first develops an SEU
evaluation framework that can quantify the failure
sensitivity for each configuration bit during design
time. This framework considers detailed fault behavior
and logic masking on a post-layout FPGA application and
performs logic simulation on various circuit elements
for fault evaluation. Applying this framework on MCNC
benchmark circuits, we first characterize SEUs with
respect to different FPGA circuits and architectures,
for example, bidirectional routing and unidirectional
routing. We show that in both routing architectures,
interconnects not only contribute to the lion's share
of the SEU-induced functional failures, but also
present higher failure rates per configuration bits
than LUTs. Particularly, local interconnect
multiplexers in logic blocks have the highest failure
rate per configuration bit. Then, we evaluate three
recently proposed SEU mitigation algorithms, IPD, IPF,
and IPV, which are all logic resynthesis-based with
little or no overhead on placement and routing.
Different fault mitigating capabilities at the chip
level are revealed, and it demonstrates that algorithms
with explicit consideration for interconnect
significantly mitigate the SEU at the chip level, for
example, IPV achieves 61\% failure rate reduction on
average against IPF with about 15\%. In addition, the
combination of the three algorithms delivers over 70\%
failure rate reduction on average at the chip level.
The experiments also reveal that in order to improve
fault tolerance at the chip level, it is necessary for
future fault mitigation algorithms to concern not only
LUT or interconnect faults, but also their
interactions. We envision that our framework can be
used to cast more useful insights for more robust FPGA
circuits, architectures, and better synthesis
algorithms.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dworak:2012:UIC,
author = "Jennifer Dworak and Kundan Nepal and Nuno Alves and
Yiwen Shi and Nicholas Imbriglia and R. Iris Bahar",
title = "Using implications to choose tests through suspect
fault identification",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "14:1--14:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390205",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As circuits continue to scale to smaller feature
sizes, wearout and latent defects are expected to cause
an increasing number of errors in the field. Online
error detection techniques, including logic
implication-based checker hardware, are capable of
detecting at least some of these errors as they occur.
However, recovery may be expensive, and the underlying
problem may lead to multiple failures of a core over
time. In this article, we will investigate the
diagnostic capability of logic implications to identify
possible failure locations when an error is detected
online. We will then utilize this information to select
a highly efficient test set that can be used to
effectively test the identified suspect locations in
both the failing core and in other identical cores in
the system.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mok:2012:DSL,
author = "Santiago Mok and John Lee and Puneet Gupta",
title = "Discrete sizing for leakage power optimization in
physical design: a comparative study",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "15:1--15:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390206",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "While sizing has been studied for over three decades,
the absence of a common framework with which to compare
methods has made progress difficult to measure. In this
article, we compare popular sizing techniques in which
gates are chosen from a discrete standard cell library
and slew and interconnect effects are accounted for.
The difference between sizing methods reduces from
roughly 53\% to 8\% between best and worst case after
slew propagation is taken into account. In our
benchmarks, no one sizing technique consistently
outperforms the others.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2012:ECM,
author = "John Lee and Puneet Gupta",
title = "{ECO} cost measurement and incremental gate sizing for
late process changes",
journal = j-TODAES,
volume = "18",
number = "1",
pages = "16:1--16:??",
month = dec,
year = "2012",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2390191.2390207",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jan 12 08:32:04 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Changes in the manufacturing process parameters may
create timing violations in a design, making it
necessary to perform an engineering change order (ECO)
to correct these problems. We present a framework for
performing incremental gate sizing for process changes
late in the design cycle, and a method for creating
initial designs that are robust to late process
changes. This includes a method for measuring and
estimating ECO cost and for transforming these costs
into linear programming optimization problems. In the
case of ECOs, the method reduces ECO costs on average,
by 89\% in changed area compared to a leading
commercial tool. Furthermore, the robust initial
designs are, on average, 55\% less likely to need
redesign in the future.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kornaros:2013:STC,
author = "Georgios Kornaros and Dionisios Pnevmatikatos",
title = "A survey and taxonomy of on-chip monitoring of
multicore systems-on-chip",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Billion transistor systems-on-chip increasingly
require dynamic management of their hardware components
and careful coordination of the tasks that they carry
out. Diverse real-time monitoring functions assist
towards this objective through the collection of
important system metrics, such as throughput of
processing elements, communication latency, or resource
utilization for each application. The online evaluation
of these metrics can result in localized or global
decisions that attempt to improve aspects of system
behavior, system performance, quality-of-service, power
and thermal effects under nominal conditions. This work
provides a comprehensive categorization of monitoring
approaches used in multiprocessor SoCs. As adaptive
systems are encountered in many disciplines, it is
imperative to present the prominent research efforts in
developing online monitoring methods. To this end we
offer a taxonomy that groups strongly related
techniques that designers increasingly use to produce
more efficient and adaptive chips. The provided
classification helps to understand and compare
architectural mechanisms that can be used in systems,
while one can envisage the innovations required to
build real adaptive and intelligent systems-on-chip.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Backasch:2013:RVM,
author = "Rico Backasch and Christian Hochberger and Alexander
Weiss and Martin Leucker and Richard Lasslop",
title = "Runtime verification for multicore {SoC} with
high-quality trace data",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicore System-on-Chip (SoC) implementations of
embedded systems are becoming very popular. In these
systems it is possible to spread out computations over
many cores. On one hand this leads to better energy
efficiency if clock frequencies and core voltages are
reduced. On the other hand this delivers very high
performance to the software developer and thus enables
complex software systems to be implemented.
Unfortunately, debugging and validation of these
systems becomes extremely difficult. Various
technological approaches try to solve this dilemma. In
this contribution we will show a new approach to
observe multi-core SoCs and make their internal
operations visible to external analysis tools. Also, we
show that runtime verification can be employed to
analyze and validate these internal operations while
the system operates in its normal environment. The
combination of these two approaches delivers
unprecedented options to the developer to understand
and verify system behavior even in complex multicore
SoCs.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Costa:2013:CDO,
author = "Jos{\'e} C. Costa and Jos{\'e} C. Monteiro",
title = "Coverage-directed observability-based validation for
embedded software",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Motivated by the need for validation methodologies for
embedded systems we propose a method for embedded
software testing that can be integrated with existing
hardware methods. Existing coverage-directed validation
methods guarantee the execution of a certain percentage
of the program code under test. Yet they do not
generally verify whether the statements executed have
any influence on the program's output. In the proposed
method, a program statement is considered covered not
simply for belonging to the executed path, but only if
its execution has influence in some observable output.
The paths are generated by searching the longest path
in terms of the number of statements in the path. Given
that not all paths are valid, we check their
feasibility using a method based on Mixed Integer
Linear Programming (MILP). Variable aliasing is
accounted for by representing variables by their memory
addresses when building this MILP problem. In this
manner, for feasible paths, we obtain immediately the
input values that allow the execution of the path.
Using these inputs, we determine the statements
actually observed. We repeat this process until a
user-specified level of coverage has been achieved. In
the generation of each new path, the statement coverage
obtained so far and the feasibility of previous paths
is taken into account. We present results that
demonstrate the effectiveness of this methodology.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2013:DRG,
author = "Chun-An Chen and Sun-Yuan Hsieh",
title = "$ t / t $-Diagnosability of regular graphs under the
{PMC} model",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A system is $ t / t $-diagnosable if, given any
collection of test results, the faulty nodes can be
isolated to within a set of at most $t$ nodes provided
that the number of faulty nodes does not exceed $t$.
Given an {$N$}-vertex graph {$G$} that is regular with
the common degree $d$ and has no cycle of three or four
vertices, this study shows that {$G$} is $ (2 d - 2) /
(2 d - 2) $ diagnosable if {$ N \geq 4 d - 30 > 0 $}.
Based on this result, the $ t / t $-diagnosabilities of
several classes of graphs can be computed
efficiently.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2013:SNC,
author = "Chen Huang and Bailey Miller and Frank Vahid and Tony
Givargis",
title = "Synthesis of networks of custom processing elements
for real-time physical system emulation",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Emulating a physical system in real-time or faster has
numerous applications in cyber-physical system design
and deployment. For example, testing of a
cyber-device's software (e.g., a medical ventilator)
can be done via interaction with a real-time digital
emulation of the target physical system (e.g., a
human's respiratory system). Physical system emulation
typically involves iteratively solving thousands of
ordinary differential equations (ODEs) that model the
physical system. We describe an approach that creates
custom processing elements (PEs) specialized to the
ODEs of a particular model while maintaining some
programmability, targeting implementation on
field-programmable gate arrays (FPGAs). We detail the
PE micro-architecture and accompanying automated
compilation and synthesis techniques. Furthermore, we
describe our efforts to use a high-level synthesis
approach that incorporates regularity extraction
techniques as an alternative FPGA-based solution, and
also describe an approach using graphics processing
units (GPUs). We perform experiments with five models:
a Weibel lung model, a Lutchen lung model, an atrial
heart model, a neuron model, and a wave model; each
model consists of several thousand ODEs and targets a
Xilinx Virtex 6 FPGA. Results of the experiments show
that the custom PE approach achieves 4X-9X speedups
(average 6.7X) versus our previous general ODE-solver
PE approach, and 7X-10X speedups (average 8.7X) versus
high-level synthesis, while using approximately the
same or fewer FPGA resources. Furthermore, the approach
achieves speedups of 18X-32X (average 26X) versus an
Nvidia GTX 460 GPU, and average speedups of more than
100X compared to a six-core TI DSP processor or a
four-core ARM processor, and 24X versus an Intel I7
quad core processor running at 3.06 GHz. While an FPGA
implementation costs about 3X-5X more than the non-FPGA
approaches, a speedup/dollar analysis shows 10X
improvement versus the next best approach, with the
trend of decreasing FPGA costs improving speedup/dollar
in the future.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Forte:2013:RAA,
author = "Domenic Forte and Ankur Srivastava",
title = "Resource-aware architectures for adaptive particle
filter based visual target tracking",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "There are a growing number of visual tracking
applications now being envisioned for mobile devices.
However, since computer vision algorithms such as
particle filtering have large computational demands,
they can result in high energy consumption and
temperatures in mobile devices. Conventional approaches
for distributed target tracking with a camera node and
a receiver node are either sender-based (SB) or
receiver-based (RB). The SB approach uses little energy
and bandwidth, but requires a sender with large
computational resources. The RB approach fits
applications where computational resources are
completely unavailable to the sender, but requires very
large energy and bandwidth. In this article, we propose
three architectures for distributed particle filtering
that (i) reduce particle filtering workload and (ii)
allow for dynamic migration of workload between nodes
participating in tracking. We also discuss an adaptive
particle filtering extension that adapts particle
filter computational complexity and can be applied to
both the conventional and proposed architectures for
improved energy efficiency. Results show that the
proposed solutions require low additional overhead,
improve on tracking system lifetime, balance node
temperatures, maintain track of the desired target, and
are more effective than conventional approaches in many
scenarios.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2013:SRE,
author = "Baoxian Zhao and Hakan Aydin and Dakai Zhu",
title = "Shared recovery for energy efficiency and reliability
enhancements in real-time applications with precedence
constraints",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "While Dynamic Voltage Scaling (DVS) remains as a
popular energy management technique for modern
computing systems, recent research has identified
significant and negative impacts of voltage scaling on
system reliability. To preserve system reliability
under DVS settings, a number of reliability-aware power
management (RA-PM) schemes have been recently studied.
However, the existing RA-PM schemes normally schedule a
separate recovery for each task whose execution is
scaled down and are rather conservative. To overcome
such conservativeness, we study in this article novel
RA-PM schemes based on the shared recovery (SHR)
technique. Specifically, we consider a set of
frame-based real-time tasks with individual deadlines
and a common period where the precedence constraints
are represented by a directed acyclic graph (DAG). We
first show that the earliest deadline first (EDF)
algorithm can always yield a schedule where all timing
and precedence constraints are met by considering the
effective deadlines of tasks derived from as late as
possible (ALAP) policy, provided that the task set is
feasible. Then, we propose a shared recovery based
frequency assignment technique (namely SHR-DAG) and
prove its optimality to minimize energy consumption
while preserving the system reliability. To exploit
additional slack that arises from early completion of
tasks, we also study a dynamic extension for SHR-DAG to
improve energy efficiency and system reliability at
runtime. The results from our extensive simulations
show that, compared to the existing RA-PM schemes,
SHR-DAG can achieve up to 35\% energy savings, which is
very close to the maximum achievable energy savings.
More interestingly, our extensive evaluation also
indicates that the new schemes offer non-trivial
improvements on system reliability over the existing
RA-PM schemes as well.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shen:2013:AAP,
author = "Hao Shen and Ying Tan and Jun Lu and Qing Wu and Qinru
Qiu",
title = "Achieving autonomous power management using
reinforcement learning",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "System level power management must consider the
uncertainty and variability that come from the
environment, the application and the hardware. A robust
power management technique must be able to learn the
optimal decision from past events and improve itself as
the environment changes. This article presents a novel
on-line power management technique based on model-free
constrained reinforcement learning (Q-learning). The
proposed learning algorithm requires no prior
information of the workload and dynamically adapts to
the environment to achieve autonomous power management.
We focus on the power management of the peripheral
device and the microprocessor, two of the basic
components of a computer. Due to their different
operating behaviors and performance considerations,
these two types of devices require different designs of
Q-learning agent. The article discusses system modeling
and cost function construction for both types of
Q-learning agent. Enhancement techniques are also
proposed to speed up the convergence and better
maintain the required performance (or power) constraint
in a dynamic system with large variations. Compared
with the existing machine learning based power
management techniques, the Q-learning based power
management is more flexible in adapting to different
workload and hardware and provides a wider range of
power-performance tradeoff.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2013:RIB,
author = "Jongwon Lee and Jonghee M. Youn and Doosan Cho and
Yunheung Paek",
title = "Reducing instruction bit-width for low-power {VLIW}
architectures",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "VLIW (very long instruction word) architectures have
proven to be useful for embedded applications with
abundant instruction level parallelism. But due to the
long instruction bus width it often consumes more power
and memory space than necessary. One way to lessen this
problem is to adopt a reduced bit-width instruction set
architecture (ISA) that has a narrower instruction word
length. This facilitates a more efficient hardware
implementation in terms of area and power by decreasing
bus-bandwidth requirements and the power dissipation
associated with instruction fetches. In practice,
however, it is impossible to convert a given ISA fully
into an equivalent reduced bit-width one because the
narrow instruction word, due to bit-width restrictions,
can encode only a small subset of normal instructions
in the original ISA. Consequently, existing processors
provide narrow instructions in very limited cases along
with severe restrictions on register accessibility. The
objective of this work is to explore the possibility of
complete conversion, as a case study, of an existing
32-bit VLIW ISA into a 16-bit one that supports
effectively all 32-bit instructions. To this objective,
we attempt to circumvent the bit-width restrictions by
dynamically extending the effective instruction word
length of the converted 16-bit operations. Further, we
will show that our proposed ISA conversion can create a
synergy effect with a VLES (variable length execution
set) architecture that is adopted in most recent VLIW
processors. According to our experiment, the code size
becomes significantly smaller after the conversion to
16-bit VLIW code. Also at a slight run time cost, the
machine with the 16-bit ISA consumes much less energy
than the original machine.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Majzoobi:2013:LPR,
author = "Mehrdad Majzoobi and Joonho Kong and Farinaz
Koushanfar",
title = "Low-power resource binding by postsilicon
customization",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes the first postsilicon
customization method for resource binding to achieve
power reduction application specific integrated
circuits (ASICs) design. Instead of committing to one
configuration of resource binding during synthesis, our
new synthesis method produces a diverse set of
candidate bindings for the design. To ensure diversity
of the resource usage patterns, we introduce a binding
candidate formation method based on the orthogonal
arrays. Additional control components are added to
enable post manufacturing selection of one of the
binding candidates. The resource binding candidate that
minimizes the power consumption is selected by
considering the specific power characteristics of each
chip. An efficient methodology for embedding several
binding candidates in one design is developed.
Evaluations on benchmark designs show the low overhead
and the effectiveness of the proposed methods. As an
example, applying our method results in an average of
14.2\% (up to 24.0\%) power savings on benchmark
circuits for a variation model in 45nm CMOS technology.
The power efficiency of our customized postsilicon
binding is expected to improve with scaling of the
technology and the likely resulting higher process
variations.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2013:LPA,
author = "Shih-Hsu Huang and Wen-Pin Tu and Chia-Ming Chang and
Song-Bin Pan",
title = "Low-power anti-aging zero skew clock gating",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In advanced CMOS technology, the NBTI (negative bias
temperature instability) effect results in delay
degradations of PMOS transistors. Further, because of
clock gating, PMOS transistors in a clock tree often
have different active probabilities, leading to
different delay degradations. If the degradation
difference is not properly controlled, this clock skew
may cause the circuit fails to function at some point
later in time. Intuitively, the degradation difference
can be eliminated, if we increase the active
probability of the low-probability clock gates to
ensure the clock gates at the same level always having
the same active probability. However, this intuitive
method may suffer from large power consumption
overhead. In this article, we point out, by carefully
planning the transistor-level clock signal propagation
path, we can have many clock gates whose active
probabilities do not affect the degradation difference.
Based on that observation, we propose a
critical-PMOS-aware clock tree design methodology to
eliminate the degradation difference with minimum power
consumption overhead. Benchmark data consistently show
our approach achieves very good results in terms of
both the NBTI-induced clock skew (i.e., the degradation
difference) and the power consumption overhead.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2013:CTM,
author = "Hai Wang and Sheldon X.-D. Tan and Duo Li and Ashish
Gupta and Yuan Yuan",
title = "Composable thermal modeling and simulation for
architecture-level thermal designs of multicore
microprocessors",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Efficient temperature estimation is vital for
designing thermally efficient, lower power and robust
integrated circuits in nanometer regime. Thermal
simulation based on the detailed thermal structures no
longer meets the demanding tasks for efficient design
space exploration. The compact and composable
model-based simulation provides a viable solution to
this difficult problem. However, building such thermal
models from detailed thermal structures was not well
addressed in the past. In this article, we propose a
new compact thermal modeling technique, called
ThermComp, standing for thermal modeling with
composable modules. ThermComp can be used for fast
thermal design space exploration for multicore
microprocessors. The new approach builds the composable
model from detailed structures for each basic module
using the finite difference method and reduces the
model complexity by the sampling-based model order
reduction technique. These composable models are then
used to assemble different multicore architecture
thermal models and realized into SPICE-like netlists.
The resulting thermal models can be simulated by the
general circuit simulator SPICE. ThermComp tries to
preserve the accuracy of fine-grained models with the
speed of coarse-grained models. Experimental results on
a number of multicore microprocessor architectures show
the new approach can easily build accurate thermal
systems from compact composable models for fast
architecture thermal analysis and optimization and is
much faster than the existing HotSpot method with
similar accuracy.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zeng:2013:IPD,
author = "Zhiyu Zeng and Suming Lai and Peng Li",
title = "{IC} power delivery: Voltage regulation and
conversion, system-level cooptimization and technology
implications",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern IC power delivery systems encompass large
on-chip passive power grids and active on-chip or
off-chip voltage converters and regulators. While there
exists little work targeting on holistic design of such
complex IC subsystems, the optimal system-level design
of power delivery is critical for achieving power
integrity and power efficiency. In this article, we
conduct a systematic design analysis on power delivery
networks that incorporate Buck Converters (BCs) and
on-chip Low-Dropout voltage regulators (LDOs) for the
entire chip power supply. The electrical interactions
between active voltage converters, regulators as well
as passive power grids and their influence on key
system design specifications are analyzed
comprehensively. With the derived design insights, the
system-level codesign of a complete power delivery
network is facilitated by a proposed automatic
optimization flow in which key design parameters of
buck converters and on-chip LDOs as well as on-chip
decoupling capacitance are jointly optimized. The
experimental results demonstrate significant
performance improvements resulted from the proposed
system cooptimization in terms of achievable area
overhead, supply noise and power efficiency. Impacts of
different decoupling capacitance technologies are also
investigated.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2013:SRB,
author = "Ren-Jie Lee and Hung-Ming Chen",
title = "A study of row-based area-array {I/O} design planning
in concurrent chip-package design flow",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "IC-centric design flow has been a common paradigm when
designing and optimizing a system. Package and
board/system designs are usually followed by
almost-ready chip designs, which causes long
turn-around time communicating with package and system
houses. In this article, the realizations of area-array
I/O design methodologies are studied. Different from
IC-centric flow, we propose a chip-package concurrent
design flow to speed up the design time. Along with the
flow, we design the I/O-bump (and P/G-bump) tile that
combines I/O (and P/G) and bump into a hard macro with
the considerations of I/O power connection and
electrostatic discharge (ESD) protection. We then
employ an I/O-row based scheme to place I/O-bump tiles
with existed metal layers. By such a scheme, it reduces
efforts in I/O placement legalization and the
redistribution layer (RDL) routing. With the emphasis
on package design awareness, the proposed methods map
package balls onto chip I/Os, thus providing an
opportunity to design chip and package in parallel. Due
to this early study of I/O and bump planning, faster
convergence can be expected with concurrent design
flow. The results are encouraging and the merits of
this flow are reassuring.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Guthaus:2013:RAP,
author = "Matthew R. Guthaus and Gustavo Wilke and Ricardo
Reis",
title = "Revisiting automated physical synthesis of
high-performance clock networks",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "High-performance clock distribution has been a
challenge for nearly three decades. During this time,
clock synthesis tools and algorithms have strove to
address a myriad of important issues helping designers
to create faster, more reliable, and more power
efficient chips. This work provides a complete
discussion of the high-performance ASIC clock
distribution using information gathered from both
leading industrial clock designers and previous
research publications. While many techniques are only
briefly explained, the references summarize the most
influential papers on a variety of topics for more
in-depth investigation. This article also provides a
thorough discussion of current issues in clock
synthesis and concludes with insight into future
research and design challenges for the community at
large.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gester:2013:BAD,
author = "Michael Gester and Dirk M{\"u}ller and Tim Nieberg and
Christian Panten and Christian Schulte and Jens Vygen",
title = "{BonnRoute}: Algorithms and data structures for fast
and good {VLSI} routing",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "32:1--32:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present the core elements of BonnRoute: advanced
data structures and algorithms for fast and
high-quality routing in modern technologies. Global
routing is based on a combinatorial approximation
scheme for min-max resource sharing. Detailed routing
uses exact shortest path algorithms, based on a
shape-based data structure for pin access and a
two-level track-based data structure for long-distance
connections. All algorithms are very fast. Compared to
an industrial router (on 32 nm and 22 nm chips),
BonnRoute is over two times faster, has 5 \% less
netlength, 20 \% less vias, and reduces detours by more
than 90 \%.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Agarwal:2013:SDS,
author = "Amit Agarwal and Jason Cong and Brian Tagiku",
title = "The survivability of design-specific spare placement
in {FPGA} architectures with high defect rates",
journal = j-TODAES,
volume = "18",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2013",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Apr 5 18:40:42 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We address the problem of optimizing fault tolerance
in FPGA architectures with high defect rates (such as
nano-FPGAs) without significantly degrading
performance. Our methods address fault tolerance during
the placement and reconfiguration stages of FPGA
programming. First, we provide several complexity
results for both the fault reconfiguration and
fault-tolerance placement problems. Then, we propose a
placement algorithm which, in the presence of randomly
generated faults, optimizes spare placement to maximize
the probability that the FPGA can be reconfigured to
meet a specified timing constraint. We also give
heuristics for reconfiguration after faults have been
detected. Despite the hardness results for both the
placement and reconfiguration problems, we show our
heuristics perform well in simulation (in one scenario,
increasing the probability of successful
reconfiguration by as much as 55\% compared to a
uniform spare placement).",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nadakuditi:2013:BAS,
author = "Raj Rao Nadakuditi and Igor L. Markov",
title = "On bottleneck analysis in stochastic stream
processing",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "34:1--34:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491478",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Past improvements in clock frequencies have
traditionally been obtained through technology scaling,
but most recent technology nodes do not offer such
benefits. Instead, parallelism has emerged as the key
driver of chip-performance growth. Unfortunately,
efficient simultaneous use of on-chip resources is
hampered by sequential dependencies, as illustrated by
Amdahl's law. Quantifying achievable parallelism in
terms of provable mathematical results can help prevent
futile programming efforts and guide innovation in
computer architecture toward the most significant
challenges. To complement Amdahl's law, we focus on
stream processing and quantify performance losses due
to stochastic runtimes. Using spectral theory of random
matrices, we derive new analytical results and validate
them by numerical simulations. These results allow us
to explore unique benefits of stochasticity and show
how and when they outweigh the costs for software
streams.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Abouelella:2013:HEI,
author = "Fatma Abouelella and Tom Davidson and Wim Meeus and
Karel Bruneel and Dirk Stroobandt",
title = "How to efficiently implement dynamic circuit
specialization systems",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "35:1--35:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491479",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Dynamic circuit specialization (DCS) is a technique
used to implement FPGA applications where some of the
input data, called parameters, change slowly compared
to other inputs. Each time the parameter values change,
the FPGA is reconfigured by a configuration that is
specialized for those new parameter values. This
specialized configuration is much smaller and faster
than a regular configuration. However, the overhead
associated with the specialization process should be
minimized to achieve the desired benefits of using the
DCS technique. This overhead is represented by both the
FPGA resources needed to specialize the FPGA at runtime
and by the specialization time. The introduction of
parameterized configurations [Bruneel and Stroobandt
2008] has improved the efficiency of DCS
implementations. However, the specialization overhead
still takes a considerable amount of resources and
time. In this article, we explore how to efficiently
build DCS systems by presenting a variety of possible
solutions for the specialization process and the
overhead associated with each of them. We split the
specialization process into two main phases: the
evaluation and the configuration phase. The PowerPC
embedded processor, the MicroBlaze, and a customized
processor (CP) are used as alternatives in the
evaluation phase. In the configuration phase, the ICAP
and a custom configuration interface (SRL
configuration) are used as alternatives. Each solution
is used to implement a DCS system for three
applications: an adaptive finite impulse response (FIR)
filter, a ternary content-addressable memory (TCAM),
and a regular expression matcher (RegEx). The
experiments show that the use of our CP along with the
SRL configuration achieves minimum overhead in terms of
resources and time. Our CP is 1.8 and 3.5 times smaller
than the PowerPC and the area-optimized implementation
of the MicroBlaze, respectively. Moreover, the use of
the CP enables a more compact representation for the
parameterized configuration in comparison to both the
PowerPC and the MicroBlaze processors. For instance, in
the FIR, the parameterized configuration compiled for
our CP is 6--7 times smaller than that for the embedded
processors.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cabodi:2013:TBM,
author = "Gianpiero Cabodi and Sergio Nocco and Stefano Quer",
title = "Thread-based multi-engine model checking for multicore
platforms",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "36:1--36:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491480",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article describes a multithreaded,
portfolio-based approach to model checking, where
multiple cores are exploited as the underlying
computing framework to support concurrent execution of
cooperative engines. We introduce a portfolio-based
approach to model checking. Our portfolio is first
driven by an approximate runtime predictor that
provides a heuristic approximation to a perfect oracle
and suggests which engines are more suitable for each
verification instance. Scalability and robustness of
the overall model-checking effort highly rely on a
concurrent, multithreaded model of execution. Following
similar approaches in related application fields, we
dovetail data partitioning, focused on proving several
properties in parallel, and engine partitioning, based
on concurrent runs of different model-checking engines
competing for completion of the same problem. We
investigate concurrency not only to effectively exploit
several available engines, which operate independently,
but also to show that a cooperative effort is possible.
In this case, we adopt a straightforward, light-weight,
model of inter-engine communication and data sharing.
We provide a detailed description of the ideas,
algorithms, and experimental results obtained on the
benchmarks from the Hardware Model Checking Competition
suites (HWMCC'10 and HWMCC'11).",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2013:AMP,
author = "Sehwan Kim and Pai H. Chou",
title = "Analysis and minimization of power-transmission loss
in locally daisy-chained systems by local energy
buffering",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "37:1--37:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491481",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power-transmission loss can be a severe problem for
low-power embedded systems organized in a daisy-chain
topology. The loss can be so high that it can result in
failure to power the load in the first place. The first
contribution of this article is a recursive algorithm
for solving the transmission current on each segment of
the daisy chain at a given supply voltage. It enables
solving not only the transmission loss but also reports
infeasible configurations if the voltage is too low.
Using this core algorithm, our second contribution is
to find energy-efficient configurations that use local
energy buffers (LEBs) to eliminate peak load on the bus
without relying on high voltage. Experimental results
confirm that our proposed techniques significantly
reduce the total energy consumption and enable the
deployed system to operate for significantly longer.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gupta:2013:ECR,
author = "Saket Gupta and Sachin S. Sapatnekar",
title = "Employing circadian rhythms to enhance power and
reliability",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "38:1--38:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491482",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a novel scheme for saving
architectural power by mitigating delay degradations in
digital circuits due to bias temperature instability
(BTI), inspired by the notion of human circadian
rhythms. The method works in two alternating phases. In
the first, the compute phase, the circuit is awake and
active, operating briskly at a greater-than-nominal
supply voltage which causes tasks to complete more
quickly. In the second, the idle phase, the circuit is
power-gated and put to sleep, enabling BTI recovery.
Since the wakeful stage works at an elevated supply
voltage, it results in greater aging than operation at
the nominal supply voltage, but the sleep state
involves a recovery that more than compensates for this
differential. We demonstrate, both at the circuit and
the architectural levels, that at about the same
performance, this approach can result in appreciable
BTI mitigation, thus reducing the guardbands necessary
to protect against aging, which results in power
savings over the conventional design.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tsai:2013:ROC,
author = "Mei-Hsiang Tsai and Po-Yang Hsu and Hung-Yi Li and
Yi-Huang Hung and Yi-Yu Liu",
title = "Routability optimization for crossbar-switch
structured {ASIC} design",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "39:1--39:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491483",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the routing architecture of a structured
application-specific integrated circuit (ASIC), the
crossbar is one of the most area-efficient switch
blocks. Nevertheless, a dangling wire occurs when there
is a routing bend in a crossbar switch. Dangling wires
incur longer wire lengths as well as a higher
interconnection capacitance. In this article, we tackle
dangling wire issues for structured ASIC routability
optimization. We first propose a compact graph model
for crossbar-switch routing. With our graph model,
switch connectivity relations can be removed to keep
the 2D structured ASIC routing graph efficient and to
speed up the runtime of our routing algorithm.
Furthermore, we propose a heuristic
dangling-wire-avoidance routing framework containing
deferred pin assignment, Steiner point reassignment,
and anchor pair insertion in order to minimize dangling
wires and channel width. Finally, in order to take
routing bends and channel width into account
simultaneously, we propose concurrent and sequential
integer linear programming (ILP) formulations and ILP
variable/constraint degeneration techniques. The
experimental results demonstrate that our proposed
heuristic routing framework reduces dangling wires by
19\%, channel width by 38\%, and wire length by 13\% to
VPR using the crossbar switch (VPR-C). In addition, our
sequential ILP router reduces dangling wires by 38\%,
channel width by 40\%, and wire length by 15\% compared
to VPR-C. Thus, the runtime efficiency of our
sequential ILP router is attractive for crossbar-switch
structured ASIC routing.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2013:ABF,
author = "Sean Shih-Ying Liu and Wan-Ting Lo and Chieh-Jui Lee
and Hung-Ming Chen",
title = "Agglomerative-based flip-flop merging and relocation
for signal wirelength and clock tree optimization",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "40:1--40:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491484",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a flip-flop merging
algorithm based on agglomerative clustering. Compared
to previous state-of-the-art on flip-flop merging, our
proposed algorithm outperforms that of Chang et al.
[2010] and Wang et al. [2011] in all aspects, including
number of flip-flop reductions, increase in signal
wirelength, displacement of flip-flops, and execution
time. Our proposed algorithm also has minimal
disruption to original placement. In comparison with
Jiang et al. [2011], Wang et al. [2011], and Chang et
al. [2010], our proposed algorithm has the least
displacement when relocating merged flip-flops. While
previous works on flip-flop merging focus on the number
of flip-flop reduction, we further evaluate the power
consumption of clock tree after flip-flop merging. To
further minimize clock tree wirelength, we propose a
framework that determines a preferable location for
relocated merged flip-flops for clock tree synthesis
(CTS). Experimental results show that our CTS-driven
flip-flop merging can reduce clock tree wirelength by
an average of 7.82\% with minimum clock network power
consumption compared to all of the previous works.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2013:EMA,
author = "Yu-Min Lee and Pei-Yu Huang",
title = "An efficient method for analyzing on-chip thermal
reliability considering process variations",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "41:1--41:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491485",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This work provides an efficient statistical
electrothermal simulator for analyzing on-chip thermal
reliability under process variations. Using the
collocation-based statistical modeling technique,
first, the statistical interpolation polynomial for
on-chip temperature distribution can be obtained by
performing deterministic electrothermal simulation very
few times and by utilizing polynomial interpolation.
After that, the proposed simulator not only provides
the mean and standard deviation profiles of on-chip
temperature distribution, but also innovates the
concept of thermal yield profile to statistically
characterize the on-chip temperature distribution more
precisely, and builds an efficient technique for
estimating this figure of merit. Moreover, a mixed-mesh
strategy is presented to further enhance the efficiency
of the developed statistical electrothermal simulator.
Experimental results demonstrate that (1) the developed
statistical electrothermal simulator can obtain
accurate approximations with orders of magnitude
speedup over the Monte Carlo method; (2) comparing with
a well-known cumulative distribution function
estimation method, APEX [Li et al. 2004], the developed
statistical electrothermal simulator can achieve 215$
\times $ speedup with better accuracy; (3) the
developed mixed-mesh strategy can achieve an order of
magnitude faster over our baseline algorithm and still
maintain an acceptable accuracy level.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shi:2013:OSC,
author = "Yiyu Shi and Jinjun Xiong and Vladimir Zolotov and
Chandu Visweswariah",
title = "Order statistics for correlated random variables and
its application to at-speed testing",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "42:1--42:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491486",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Although order statistics have been studied for
several decades, most of the results are based on the
assumption of independent and identically distributed
(i.i.d.) random variables. In the literature, how to
compute the $m$ th order statistics of $n$ correlated
random variables is still a problem. This article
proposes a recursive algorithm based on statistical
min/max operations to compute order statistics for
general correlated and not necessarily identically
distributed random variables. The algorithm has an {$
O(m n) $} time complexity and {$ O(m + n) $} space
complexity. A binary tree-based data structure is
further developed to allow selective update of the
order statistics with {$ O(n m^2) $} time. As a vehicle
to demonstrate the algorithm, we apply it to the path
selection algorithm in at-speed testing. A novel metric
multilayer process space coverage metric is proposed to
quantitatively gauge the quality of path selection. We
then show that such a metric is directly linked to the
order statistics, and our recursive algorithm can thus
be applied. By employing a branch-and-bound path
selection algorithm with these techniques, this article
shows that selecting an optimal set of paths for a
multimillion-gate design can be performed efficiently.
Compared to the state of the art, experimental results
show both the efficiency of our algorithms and better
quality of our path selection.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2013:PSA,
author = "Wei Zhao and Junxia Ma and Mohammad Tehranipoor and
Sreejit Chakravarty",
title = "Power-safe application of tdf patterns to flip-chip
designs during wafer test",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "43:1--43:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491487",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to high switching activities in test mode, circuit
power consumption is higher than its functional
operation. Large switching in the circuit during
launch-to-capture cycles not only negatively impacts
circuit performance causing overkill, but could also
burn tester probes during wafer test due to the
excessive current they must drive. It is necessary to
develop a quick and effective method for evaluating
each pattern, identifying high-power patterns
considering functional and tester probes' current
limits and making the final pattern set power-safe.
Compared with previous low-power methods that deal with
scan structure modification or pattern filling
techniques, the new proposed method takes into account
layout information and resistance in the power
distribution network and can identify peak current
among C4 power bumps. Post-processing steps replace
power-unsafe patterns with low-power ones. The final
pattern set provides considerable peak current
reduction while fault coverage is maintained.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xiang:2013:TCS,
author = "Dong Xiang and Jianbo Li and Krishnendu Chakrabarty
and Xijiang Lin",
title = "Test compaction for small-delay defects using an
effective path selection scheme",
journal = j-TODAES,
volume = "18",
number = "3",
pages = "44:1--44:??",
month = jul,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2491477.2491488",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jul 27 08:09:07 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Testing for small-delay defects (SDDs) requires
fault-effect propagation along the longest testable
paths. However, identification of the longest testable
paths requires high CPU time, and the sensitization of
all such paths leads to large pattern counts. Dynamic
test compaction for small-delay defects is therefore
necessary to reduce test-data volume. We present a new
technique for identifying the longest testable paths
through each gate in order to accelerate test
generation for SDDs. The resulting test patterns
sensitize the longest testable paths that pass through
each SDD site. An efficient dynamic test compaction
method based on structural analysis is presented to
reduce the pattern count substantially, while ensuring
that all the longest paths for each SDD are sensitized.
Simulation results for a set of ISCAS 89 and IWLS 05
benchmark circuits demonstrate the effectiveness of
this method.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Anonymous:2013:CNE,
author = "Anonymous",
title = "Call for nominations for {Editor-in-Chief}",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "44:1--44:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2541012.2541672",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Marculescu:2013:ESS,
author = "Diana Marculescu and Chita Das",
title = "Editorial to special section on networks on chip:
Architecture, tools, and methodologies",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "45:1--45:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2541012.2541013",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bogdan:2013:DPM,
author = "Paul Bogdan and Radu Marculescu and Siddharth Jain",
title = "Dynamic power management for multidomain
system-on-chip platforms: an optimal control approach",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "46:1--46:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2504904",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Reducing energy consumption in multiprocessor
systems-on-chip (MPSoCs) where communication happens
via the network-on-chip (NoC) approach calls for
multiple voltage/frequency island (VFI)-based designs.
In turn, such multi-VFI architectures need efficient,
robust, and accurate runtime control mechanisms that
can exploit the workload characteristics in order to
save power. Despite being tractable, the linear control
models for power management cannot capture some
important workload characteristics (e.g., fractality,
nonstationarity) observed in heterogeneous NoCs; if
ignored, such characteristics lead to inefficient
communication and resources allocation, as well as high
power dissipation in MPSoCs. To mitigate such
limitations, we propose a new paradigm shift from power
optimization based on linear models to control
approaches based on fractal-state equations. As such,
our approach is the first to propose a controller for
fractal workloads with precise constraints on state and
control variables and specific time bounds. Our results
show that significant power savings can be achieved at
runtime while running a variety of benchmark
applications.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2013:NMC,
author = "Xi Chen and Zheng Xu and Hyungjun Kim and Paul Gratz
and Jiang Hu and Michael Kishinevsky and Umit Ogras",
title = "In-network monitoring and control policy for {DVFS} of
{CMP} networks-on-chip and last level caches",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "47:1--47:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2504905",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In chip design today and for a foreseeable future, the
last-level cache and on-chip interconnect is not only
performance critical but also a substantial power
consumer. This work focuses on employing dynamic
voltage and frequency scaling (DVFS) policies for
networks-on-chip (NoC) and shared, distributed
last-level caches (LLC). In particular, we consider a
practical system architecture where the distributed LLC
and the NoC share a voltage/frequency domain that is
separate from the core domain. This architecture
enables the control of the relative speed between the
cores and memory hierarchy without introducing
synchronization delays within the NoC. DVFS for this
architecture is more complex than individual
link/core-based DVFS since it involves spatially
distributed monitoring and control. We propose an
average memory access time (AMAT)-based monitoring
technique and integrate it with DVFS based on PID
control theory. Simulations on PARSEC benchmarks yield
a 27\% energy savings with a negligible impact on
system performance.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2013:AVC,
author = "Jaekyu Lee and Si Li and Hyesoon Kim and Sudhakar
Yalamanchili",
title = "Adaptive virtual channel partitioning for
network-on-chip in heterogeneous architectures",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "48:1--48:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2504906",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Current heterogeneous chip-multiprocessors (CMPs)
integrate a GPU architecture on a die. However, the
heterogeneity of this architecture inevitably exerts
different pressures on shared resource management due
to differing characteristics of CPU and GPU cores. We
consider how to efficiently share on-chip resources
between cores within the heterogeneous system, in
particular the on-chip network. Heterogeneous
architectures use an on-chip interconnection network to
access shared resources such as last-level cache tiles
and memory controllers, and this type of on-chip
network will have a significant impact on performance.
In this article, we propose a feedback-directed virtual
channel partitioning (VCP) mechanism for on-chip
routers to effectively share network bandwidth between
CPU and GPU cores in a heterogeneous architecture. VCP
dedicates a few virtual channels to CPU and GPU
applications with separate injection queues. The
proposed mechanism balances on-chip network bandwidth
for applications running on CPU and GPU cores by
adaptively choosing the best partitioning
configuration. As a result, our mechanism improves
system throughput by 15\% over the baseline across 39
heterogeneous workloads.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Abousamra:2013:OCE,
author = "Ahmed Abousamra and Alex K. Jones and Rami Melhem",
title = "Ordering circuit establishment in multiplane {NoCs}",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "49:1--49:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2500752",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Segregating networks-on-chips (NoCs) into data and
control planes yields several opportunities for
improving power and performance in chip-multiprocessor
systems (CMPs). This article describes a hybrid
packet/circuit switched multiplane network optimized to
reduce latency in order to improve system performance
and/or reduce system energy. Unlike traditional circuit
preallocation techniques which require timestamps to
reserve circuit resources, this article proposes an
order-based preallocation scheme. By enforcing the
order in which resources are scheduled and utilized
rather than a fixed time, the NoC can take advantage of
messages that arrive early while naturally tolerating
message delays due to contention. Ordered circuit
establishment is presented using two techniques. First,
D{\'e}j{\`a} Vu switching preestablishes circuits for
data messages once a cache hit is detected and prior to
the requested data becoming available. Second, using
Red Carpet Routing, circuits are proactively reserved
for a return data message as a request message
traverses the NoC. The reduced communication latency
over configured circuits enable system performance
improvement or saving NoC energy by reducing voltage
and frequency without sacrificing performance. In
simulations of 16 and 64 core CMPs, D{\'e}j{\`a} Vu
switching enabled average NoC energy savings of 43\%
and 53\% respectively. On the other hand, simulations
of communication sensitive benchmarks using Red Carpet
Routing show speedup in execution time of up to 16\%,
with an average of 10\% over a purely packet switched
NoC and an average of 8\% over preconfiguring circuits
using D{\'e}j{\`a} Vu switching.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2013:DRN,
author = "Jinho Lee and Dongwoo Lee and Sunwook Kim and Kiyoung
Choi",
title = "Deflection routing in {$3$D} network-on-chip with
limited vertical bandwidth",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "50:1--50:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505011",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes a deflection routing for 3D NoC
with serialized TSVs for vertical links. Compared to
buffered routing, deflection routing provides area- and
power-efficient communication and little loss of
performance under low to medium traffic load. Under 3D
environments, the deflection routing can yield even
better performance than buffered routing when key
aspects are properly taken into account. However, the
existing deflection routing technique cannot be
directly applied because the serialized TSV links will
take longer time to send data than ordinary planar
links and cause many problems. A naive deflection
through a TSV link can cause significantly longer
latency and more energy consumption even for
communications through planar links. This article
proposes a method to mitigate the effect and also solve
arising deadlock and livelock problems. Evaluation of
the proposed scheme shows its effectiveness in
throughput, latency, and energy consumption.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shojaei:2013:FSM,
author = "Hamid Shojaei and Twan Basten and Marc Geilen and
Azadeh Davoodi",
title = "A fast and scalable multidimensional multiple-choice
knapsack heuristic",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "51:1--51:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2541012.2541014",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Many combinatorial optimization problems in the
embedded systems and design automation domains involve
decision making in multidimensional spaces. The
multidimensional multiple-choice knapsack problem
(MMKP) is among the most challenging of the encountered
optimization problems. MMKP problem instances appear
for example in chip multiprocessor runtime resource
management and in global routing of wiring in circuits.
Chip multiprocessor resource management requires
solving MMKP under real-time constraints, whereas
global routing requires scalability of the solution
approach to extremely large MMKP instances. This
article presents a novel MMKP heuristic, CPH (for
Compositional Pareto-algebraic Heuristic), which is a
parameterized compositional heuristic based on the
principles of Pareto algebra. Compositionality allows
incremental computation of solutions. The
parameterization allows tuning of the heuristic to the
problem at hand. These aspects make CPH a very
versatile heuristic. When tuning CPH for computation
time, MMKP instances can be solved in real time with
better results than the fastest MMKP heuristic so far.
When tuning CPH for solution quality, it finds several
new solutions for standard benchmarks that are not
found by any existing heuristic. CPH furthermore scales
to extremely large problem instances. We illustrate and
evaluate the use of CPH in both chip multiprocessor
resource management and in global routing.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yoon:2013:ACC,
author = "Jonghee W. Yoon and Jongeun Lee and Sanghyun Park and
Yongjoo Kim and Jinyong Lee and Yunheung Paek and
Doosan Cho",
title = "Architecture customization of on-chip reconfigurable
accelerators",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "52:1--52:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2493384",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Integrating coarse-grained reconfigurable
architectures (CGRAs) into a System-on-a-Chip (SoC)
presents many benefits as well as important challenges.
One of the challenges is how to customize the
architecture for the target applications efficiently
and effectively without performing explicit design
space exploration. In this article we present a novel
methodology for incremental interconnect customization
of CGRAs that can suggest a new interconnection
architecture which is able to maximize the performance
for a given set of application kernels while minimizing
the hardware cost. In our methodology, we translate the
problem of interconnect customization into that of
inexact graph matching, and we devised a heuristic for
A* search algorithm to efficiently solve the inexact
graph matching problem. Our experimental results
demonstrate that our customization method can quickly
find application-optimized interconnections that
exhibit 80\% higher performance on average compared to
the base architecture which has mesh interconnections,
with little energy and hardware increase in
interconnections and muxes.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jeyapaul:2013:EEE,
author = "Reiley Jeyapaul and Aviral Shrivastava",
title = "Enabling energy efficient reliability in embedded
systems through smart cache cleaning",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "53:1--53:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505012",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Incessant and rapid technology scaling has brought us
to a point where today's, and future transistors are
susceptible to transient errors induced by energy
carrying particles, called soft errors. Within a
processor, the sheer size and nature of data in the
caches render it most vulnerable to electrical
interference on data stored in the cache. Data in the
cache is vulnerable to corruption by soft errors, for
the time it remains actively unused in the cache.
Write-through and early-write-back [Li et al. 2004]
cache configurations reduce the time for vulnerable
data in the cache, at the cost of increased memory
writes and thereby energy. We propose a smart cache
cleaning methodology, that enables copying of only
specific vulnerable cache blocks into the memory at
chosen times, thereby ensuring data cache protection
with minimal memory writes. In this work, we first
propose a hybrid (software-hardware) methodology. We
then propose an improved software solution that
utilizes cache write-back functionality available in
commodity processors; thereby reducing the hardware
overhead required to implement smart cache cleaning for
such systems. The parameters involved in the
implementation of our Smart Cache Cleaning (SCC)
technique enable a means to provide for customizable
energy-efficient soft error reduction in the L1 data
cache. Given the system requirements of reliability,
power-budget and runtime priority of the application,
appropriate parameters of the SCC can be customized to
trade-off power consumption and L1 data cache
reliability. Our experiments over LINPACK and Livermore
benchmarks demonstrate 26\% reduced
energy-vulnerability product (energy-efficient
vulnerability reduction) compared to that of hardware
based cache reliability techniques. Our software-only
solution achieves same levels of reliability with an
additional 28\% performance improvement.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kadayif:2013:HSA,
author = "Ismail Kadayif and Mahir Turkcan and Seher Kiziltepe
and Ozcan Ozturk",
title = "Hardware\slash software approaches for reducing the
process variation impact on instruction fetches",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "54:1--54:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2489778",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As technology moves towards finer process geometries,
it is becoming extremely difficult to control critical
physical parameters such as channel length, gate oxide
thickness, and dopant ion concentration. Variations in
these parameters lead to dramatic variations in access
latencies in Static Random Access Memory (SRAM)
devices. This means that different lines of the same
cache may have different access latencies. A simple
solution to this problem is to adopt the worst-case
latency paradigm. While this egalitarian cache
management is simple, it may introduce significant
performance overhead during instruction fetches when
both address translation (instruction Translation
Lookaside Buffer (TLB) access) and instruction cache
access take place, making this solution infeasible for
future high-performance processors. In this study, we
first propose some hardware and software enhancements
and then, based on those, investigate several
techniques to mitigate the effect of process variation
on the instruction fetch pipeline stage in modern
processors. For address translation, we study an
approach that performs the virtual-to-physical page
translation once, then stores it in a special register,
reusing it as long as the execution remains on the same
instruction page. To handle varying access latencies
across different instruction cache lines, we annotate
the cache access latency of instructions within
themselves to give the circuitry a hint about how long
to wait for the next instruction to become available.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2013:EWD,
author = "Guanying Wu and Xubin He and Ningde Xie and Tong
Zhang",
title = "Exploiting workload dynamics to improve {SSD} read
latency via differentiated error correction codes",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "55:1--55:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2489792",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a cross-layer codesign approach
to reduce SSD read response latency. The key is to
cohesively exploit the NAND flash memory device write
speed vs. raw storage reliability trade-off at the
physical layer and runtime data access workload
dynamics at the system level. Leveraging runtime data
access workload variation, we can opportunistically
slow down NAND flash memory write speed and hence
improve NAND flash memory raw storage reliability. This
naturally enables an opportunistic use of weaker error
correction schemes that can directly reduce SSD read
access latency. We develop a disk-level scheduling
scheme to effectively smooth the write workload in
order to maximize the occurrence of runtime
opportunistic NAND flash memory write slowdown. Using 2
bits/cell NAND flash memory with BCH-based error
correction correction as a test vehicle, we carry out
extensive simulations over various workloads and
demonstrate that this developed cross-layer co-design
solution can reduce the average SSD read latency by up
to 59.4\% without sacrificing the write throughput
performance.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2013:IBM,
author = "Po-Chun Huang and Yuan-Hao Chang and Tei-Wei Kuo",
title = "An index-based management scheme with adaptive caching
for huge-scale low-cost embedded flash storages",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "56:1--56:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505013",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to its remarkable access performance, shock
resistance, and costs, NAND flash memory is now widely
adopted in a variety of computing environments,
especially in mobile devices such as smart phones,
media players and electronic book readers. For the
consideration of costs, low-cost embedded flash
storages such as flash memory cards are often employed
on such devices. Different from solid-state disks, the
RAM buffer equipped on low-cost embedded flash storages
are very small, for example, limited under several
dozens of kilobytes, despite of the rapidly growing
capacity of the storages. The significance of
effectively utilizing the very limited on-device RAM
buffers of embedded flash storages is therefore
highlighted, and a novel design of scalable flash
management schemes is needed to tackle the new access
constraints of MLC NAND flash memory. In this work, a
highly scalable design of the flash translation layer
is presented with the considerations of the on-device
RAM size, user access patterns,
address-mapping-information caching and MLC access
constraints. Through a series of experiments, it is
verified that, with appropriate settings of cache
sizes, the proposed management scheme provides
comparable performance results to prior arts with much
lower requirements on the on-device RAM. In other
words, the proposed scheme suggests a strategy to make
better use of the on-device RAM, and is suitable for
embedded flash storages.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2013:CSL,
author = "Bo Zhao and Jun Yang and Youtao Zhang and Yiran Chen
and Hai Li",
title = "Common-source-line array: an area efficient memory
architecture for bipolar nonvolatile devices",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "57:1--57:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2500459",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Traditional array organization of bipolar nonvolatile
memories such as STT-MRAM and memristor utilizes two
bitlines for cell manipulations. With technology
scaling, such bitline pair will soon become the
bottleneck for further density improvement. In this
article we propose a novel common-source-line array
architecture, which uses a shared source-line along the
row, leaving only one bitline per column. We elaborate
the array design to ensure reliability, and demonstrate
its effectiveness on STT-MRAM and memristor memory
arrays. Our study results show that with comparable
latency and energy, the proposed common-source-line
array can save 34\% and 33\% area for Memristor-RAM and
STT-MRAM respectively, compared with corresponding
dual-bitline arrays.",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{DaRolt:2013:NDS,
author = "Jean {Da Rolt} and Giorgio {Di Natale} and Marie-Lise
Flottes and Bruno Rouzeyre",
title = "A novel differential scan attack on advanced {DFT}
structures",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "58:1--58:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505014",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scan chains insertion is the most common technique to
ensure the testability of digital cores, providing high
fault coverage. However, for ICs dealing with secret
information, scan chains can be used as back doors for
accessing secret data thus becoming a threat to system
security. So far, advanced test structures used to
reduce test costs (e.g., response compaction) and
achieve high fault coverage (e.g., X's masking decoder)
have been considered as intrinsic countermeasures
against these threats. This work proposes a new generic
scan-based attack demonstrating that these test
structures are not sufficiently effective to prevent
leakage through the test infrastructure. This generic
attack can be easily adapted to several cryptographic
implementations for both symmetric and public key
algorithms. The proposed attack is demonstrated on
several ciphers.",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2013:PDS,
author = "Yao-Lin Chang and I-Lun Tseng",
title = "A parallel dual-scanline algorithm for partitioning
parameterized 45-degree polygons",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "59:1--59:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505015",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In order to use rectangular corner stitching data
structures in storing parameterized orthogonal layouts,
parameterized polygons in the layouts must be
partitioned into rectangles. Likewise, in order to use
trapezoidal corner stitching data structures in storing
parameterized 45-degree layouts, parameterized polygons
in the layouts have to be partitioned into trapezoids.
In this article, a parallel polygon partitioning
algorithm is proposed; the algorithm is capable of
partitioning parameterized orthogonal polygons into
parameterized rectangles as well as partitioning
parameterized 45-degree polygons into parameterized
trapezoids. Additionally, the algorithm can be used to
partition fixed-coordinate polygons. By adopting the
dual-scanline technique, which involves using two
scanlines to concurrently sweep an input polygon, the
parallel partitioning algorithm can process vertices
and edges of the input polygon efficiently. The
parallel polygon partitioning algorithm has been
implemented in C++ with the use of OpenMP. Compared
with a sequential partitioning program which uses a
single scanline, our parallel partitioning program can
achieve 20\% to 30\% speedup while partitioning large
parameterized polygons or partitioning parameterized
polygons with complex constraints.",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ramanujam:2013:DBC,
author = "Rohit Sunkam Ramanujam and Bill Lin",
title = "Destination-based congestion awareness for adaptive
routing in {$2$D} mesh networks",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "60:1--60:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505055",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The choice of routing algorithm plays a vital role in
the performance of on-chip interconnection networks.
Adaptive routing is appealing because it offers better
latency and throughput than oblivious routing,
especially under nonuniform and bursty traffic. The
performance of an adaptive routing algorithm is
determined by its ability to accurately estimate
congestion in the network. In this regard, maintaining
global congestion state using a separate monitoring
network offers better congestion visibility into
distant parts of the network compared to solutions
relying only on local congestion. However, the main
challenge in designing such routing schemes is to keep
the logic and bandwidth overhead as low as possible to
fit into the tight power, area, and delay budgets of
on-chip routers. In this article, we propose a minimal
destination-based adaptive routing strategy (DAR),
where every node estimates the delay to every other
node in the network, and routing decisions are based on
these per-destination delay estimates. DAR outperforms
Regional Congestion Awareness (RCA), the best
previously known adaptive routing algorithm that uses
nonlocal congestion state. The performance improvement
is brought about by maintaining fine-grained
per-destination delay estimates in DAR that are more
accurate than regional congestion metrics measured in
RCA. The increased accuracy is a consequence of the
fact that the per-destination delay estimates are not
corrupted by congestion on links outside the admissible
routing paths to the destination. A scalable version of
DAR, referred to as SDAR, is also proposed for
minimizing the overheads associated with DAR in large
network topologies. We show that DAR outperforms local
adaptive routing by up to 79\% and RCA by up to 58\% in
terms of latency on SPLASH-2 benchmarks. DAR and SDAR
also outperform existing adaptive and oblivious routing
algorithms in latency and throughput under synthetic
traffic patterns on 8$ \times $8 and 16times;16 mesh
topologies, respectively.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2013:RAG,
author = "Tan Yan and Qiang Ma and Scott Chilstedt and Martin D.
F. Wong and Deming Chen",
title = "A routing algorithm for graphene nanoribbon circuit",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "61:1--61:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505056",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Conventional CMOS devices are facing an increasing
number of challenges as their feature sizes scale down.
Graphene nanoribbon (GNR) based devices are shown to be
a promising replacement of traditional CMOS at future
technology nodes. However, all previous works on GNRs
focus at the device level. In order to integrate these
devices into electronic systems, routing becomes a key
issue. In this article, the GNR routing problem is
studied for the first time. We formulate the GNR
routing problem as a minimum hybrid-cost shortest path
problem on triangular mesh (``hybrid'' means that we
need to consider both the length and the bending of the
routing path). We show that by graph expansion, this
minimum hybrid-cost shortest path problem can be solved
by applying the conventional shortest path algorithm on
the expanded graph. Experimental results show that our
GNR routing algorithm effectively handles the hybrid
cost.",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ayoub:2013:CCM,
author = "Raid Ayoub and Rajib Nath and Tajana Simunic Rosing",
title = "{CoMETC}: Coordinated management of energy\slash
thermal\slash cooling in servers",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534381",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We introduce a Coordinated Management of Energy,
Thermal, and Cooling (CoMETC) technique to minimize
cooling and memory energy of server machines.
State-of-the-art solutions decouple the optimization of
cooling energy costs and energy consumption of CPU and
memory subsystems. This results in suboptimal solutions
due to thermal dependencies between CPU and memory and
the nonlinearity in energy costs of cooling. In
contrast, we develop a unified solution that integrates
energy, thermal, and cooling management for CPU and
memory subsystems to maximize energy savings. CoMETC
reduces the operational energy of the memory by
clustering active memory pages to a subset of memory
modules while accounting for thermal and cooling
aspects. At the same time, CoMETC removes hotspots
between and within the CPU sockets and reduces the
effects of thermal coupling with memory in order to
minimize cooling energy costs. We design CoMETC using a
control-theoretic approach to guarantee meeting these
objectives. We introduce a formal thermal and cooling
model to be used for online decisions inside CoMETC.
Our experimental results show that CoMETC achieves
average cooling and memory energy savings of 58\%
compared to state-of-the-art techniques at a
performance overhead of less than 0.3\%.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Al-Dujaily:2013:DPB,
author = "Ra'ed Al-Dujaily and Nizar Dahir and Terrence Mak and
Fei Xia and Alex Yakovlev",
title = "Dynamic programming-based runtime thermal management
{(DPRTM)}: an online thermal control strategy for
{$3$D-NoC} systems",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534382",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Complex thermal behavior inhibits the advancement of
three-dimensional (3D) very-large-scale-integration
(VLSI) system designs, as it could lead to ultra-high
temperature hotspots and permanent silicon device
damage. This article introduces a new runtime thermal
management strategy to effectively diffuse and manage
heat throughout 3D chip geometry for a better
throughput performance in networks on chip (NoC). This
strategy employs a dynamic programming-based runtime
thermal management (DPRTM) policy to provide online
thermal regulation. Reactive and proactive adaptive
schemes are integrated to optimize the routing pathways
depending on the critical temperature thresholds and
traffic developments. Also, when the critical system
thermal limit is violated, an urgent throttling will
take place. The proposed DPRTM is rigorously evaluated
through cycle-accurate simulations, and results show
that the proposed approach outperforms conventional
approaches in terms of computational efficiency and
thermal stability. For example, the system throughput
using the DPRTM approach can be improved by 33\% when
compared to other adaptive routing strategies for a
given thermal constraint. Moreover, the DPRTM
implementation presented in this article demonstrates
that the hardware overhead is insignificant. This work
opens a new avenue for exploring the on-chip
adaptability and thermal regulation for future
large-scale and 3D many-core integrations.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2013:IPP,
author = "Yen-Jen Chang and Hsiang-Yu Lu",
title = "Improving the performance of port range check for
network packet filtering",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2523069",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article introduces a high-performance packet
filter design in which we propose the partial parallel
range check (PPRC) technique for speeding up port range
check. Unlike the conventional serial design that uses
cascading cells to perform the serial check, PPRC
divides the single path into several segments. All PPRC
segments perform the range compare simultaneously, that
is, parallel check, and then the results of each
segment are serialized to generate the final check
result. Besides theoretical analyses, we also use UMC
90nm CMOS process to implement the PPRC design and
verify its effect on the check performance. Compared to
state-of-the-art range check techniques, the results
show that the PPRC design with the best configuration
can improve check performance by 28\%, at least. In
addition, the PPRC design is more stable and energy
efficient than related designs, even though it requires
more transistors to implement the peripheral circuitry.
The range of energy improvement achieved by the PPRC
design is about 35\%--70\%.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kritikakou:2013:NOS,
author = "Angeliki Kritikakou and Francky Catthoor and Vasilios
Kelefouras and Costas Goutis",
title = "Near-optimal and scalable intrasignal in-place
optimization for non-overlapping and irregular access
schemes",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534383",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Storage-size management techniques aim to reduce the
resources required to store elements and to
concurrently provide efficient addressing during
element accessing. Existing techniques are less
appropriate for large iteration spaces with increased
numbers of irregularly spread holes. They either have
to approximate the accessed regions, leading to
overestimation of the final resources, or they require
prohibited exploration time to find the storage size.
In this work, we present a near-optimal and scalable
methodology for storage-size, intrasignal, in-place
optimization, that is, to compute the minimum amount of
resources required to store the elements of a group
(array), for irregular complex access schemes in the
target domain of non-overlapping store and load
accesses.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2013:LEV,
author = "Jianhua Li and Liang Shi and Qingan Li and Chun Jason
Xue and Yiran Chen and Yinlong Xu and Wei Wang",
title = "Low-energy volatile {STT--RAM} cache design using
cache-coherence-enabled adaptive refresh",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534393",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Spin-Torque Transfer RAM (STT-RAM) is a promising
candidate for SRAM replacement because of its excellent
features, such as fast read access, high density, low
leakage power, and CMOS technology compatibility.
However, wide adoption of STT-RAM as cache memories is
impeded by its long write latency and high write power.
Recent work proposed improving the write performance
through relaxing the retention time of STT-RAM cells.
The resultant volatile STT-RAM needs to be periodically
refreshed to prevent data loss. When volatile STT-RAM
is applied as the last-level cache (LLC) in chip
multiprocessor (CMP) systems, frequent refresh
operations could dissipate significant extra energy. In
addition, refresh operations could severely conflict
with normal read/write operations to degrade overall
system performance. Therefore, minimizing the
performance impact caused by refresh operations is
crucial for the adoption of volatile STT-RAM. In this
article, we propose Cache-Coherence-Enabled Adaptive
Refresh (CCear) to minimize the number of refresh
operations for volatile STT-RAM, adopted as the LLC for
CMP systems. Specifically, CCear interacts with cache
coherence protocol and cache management policy to
minimize the number of refresh operations on volatile
STT-RAM caches. Full-system simulation results show
that CCear performs close to an ideal refresh policy
with low overhead. Compared with state-of-the-art
refresh policies, CCear simultaneously improves the
system performance and reduces the energy consumption.
Moreover, the performance of CCear could be further
enhanced using small filter caches to accommodate the
not-refreshed private STT-RAM blocks.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2013:PBA,
author = "Xue-Xin Liu and Sheldon X.-D. Tan and Adolfo Adair
Palma-Rodriguez and Esteban Tlelo-Cuautle and Guoyong
Shi",
title = "Performance bound analysis of analog circuits in
frequency- and time-domain considering process
variations",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534395",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a new performance bound
analysis of analog circuits considering process
variations. We model the variations of component values
as intervals measured from tested chips and manufacture
processes. The new method first applies a graph-based
analysis approach to generate the symbolic transfer
function of a linear(ized) analog circuit. Then the
frequency response bounds (maximum and minimum) are
obtained by performing nonlinear constrained
optimization in which magnitude or phase of the
transfer function is the objective function to be
optimized subject to the ranges of process variational
parameters. The response bounds given by the
optimization-based method are very accurate and do not
have the over-conservativeness issues of existing
methods. Based on the frequency-domain bounds, we
further develop a method to calculate the time-domain
response bounds for any arbitrary input stimulus.
Experimental results from several analog benchmark
circuits show that the proposed method gives the
correct bounds verified by Monte Carlo analysis while
it delivers one order of magnitude speedup over Monte
Carlo for both frequency-domain and time-domain bound
analyses. We also show analog circuit yield analysis as
an application of the frequency-domain variational
bound analysis.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2013:OCC,
author = "Chien-Chih Huang and Chin-Long Wey and Jwu-E Chen and
Pei-Wen Luo",
title = "Optimal common-centroid-based unit capacitor
placements for yield enhancement of switched-capacitor
circuits",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534394",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Yield is defined as the probability that the circuit
under consideration meets with the design specification
within the tolerance. Placement with higher correlation
coefficients has fewer mismatches and lower variation
of capacitor ratio, thus achieving higher yield
performance. This study presents a new optimization
criterion that quickly determines if the placement is
optimal. The optimization criterion leads to the
development of the concepts of C-entries and
partitioned subarrays which can significantly reduce
the searching space for finding the
optimal/near-optimal placements on a sufficiently large
array size.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2013:BGM,
author = "Irith Pomeranz",
title = "Built-in generation of multicycle functional broadside
tests with observation points",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "8:1--8:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534396",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Functional broadside tests allow overtesting to be
avoided as part of a scheme that considers both test
generation and the analysis of output responses, by
ensuring that delay faults are detected under
functional operation conditions. Compared with
two-cycle tests, multicycle tests allow more faults to
be detected with each test, thus reducing the number of
tests that need to be applied. They also provide an
opportunity for nonfunctional electrical effects, which
are caused by switching between modes of operation, to
subside before the clock cycles where delay faults are
detected. Built-in test generation facilitates at-speed
testing and reduces the test data volume. Motivated by
these observations, this article describes the
modification of a built-in test generation method for
two-cycle functional broadside tests so as to generate
multicycle functional broadside tests. The size of the
hardware is not increased by the modification. The
article investigates the following issues related to
this method: (1) the effect of using multicycle tests
on the number of tests that need to be applied; (2)
fault simulation for tailoring the test generation
hardware to a circuit that takes into account, to
different extents, the need to allow nonfunctional
electrical effects to subside; (3) the insertion of
observation points in order to increase the transition
fault coverage.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tong:2013:TCT,
author = "Jason G. Tong and Marc Boul{\'e} and Zeljko Zilic",
title = "Test compaction techniques for assertion-based test
generation",
journal = j-TODAES,
volume = "19",
number = "1",
pages = "9:1--9:??",
month = dec,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2534397",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Dec 17 17:21:29 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Assertions are now widely used in verification as a
means to help convey designer intent and also to
simplify the detection of erroneous conditions by the
firing of assertions. With this expressive modeling
power, assertions could also be used for tasks such as
helping to assess test coverage and even as a source
for test generation. Our work deals with this last
aspect, namely, assertion-based test generation. In
this article, we present our compacted test generation
scheme based on assertions. Novel compaction techniques
are presented based on assertion clustering, test-path
overlap detection and parallel-path removal. Our
compaction approach is experimentally evaluated using
nearly 300 assertions to show the amount of reduction
that can be obtained in the size of the test sets. This
ultimately has a positive impact on verification time
in the quest for bugfree designs.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tu:2014:PPP,
author = "Chia-Heng Tu and Hui-Hsin Hsu and Jen-Hao Chen and
Chun-Han Chen and Shih-Hao Hung",
title = "Performance and power profiling for emulated {Android}
systems",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "10:1--10:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566660",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Simulation is a common approach for assisting system
design and optimization. For system-wide optimization,
energy and computational resources are often the two
most critical issues. Monitoring the energy state of
each hardware component and measuring the time spent in
each state is needed for accurate energy and
performance prediction. For software optimization, it
is important to profile the energy and the time
consumed by each software construct in a realistic
operating environment with a proper workload. However,
the conventional approaches of simulation often fail to
produce satisfying data. First, building a
cycle-accurate simulation environment for a complex
system, such as an Android smartphone, is difficult and
can take a long time. Second, a slow simulation can
significantly alter the behavior of multithreaded,
I/O-intensive applications and can affect the accuracy
of profiles. Third, existing software-based profilers
generally do not work on simulators, which makes it
difficult for performance analysis of complicated
software, for example, Java applications executed by
the Dalvik VM in an Android system. To address these
aforementioned problems, we proposed and prototyped a
framework, called virtual performance analyzer (VPA).
VPA takes advantage of an existing emulator or virtual
machine monitor to reduce the complexity of building a
simulator. VPA allows the user to selectively and
incrementally integrate timing models and power models
into the emulator with our carefully designed
performance/power monitors, tracing facility, and
profiling tools to evaluate and analyze the emulated
system. The emulated system can perform at different
levels of speed to help verify if the profile data are
impacted by the emulation speed. Finally, VPA supports
existing software-based profiles and enables
non-intrusive tracing/profiling by minimizing the probe
effect. Our experimental results show that the VPA
framework allows users to quickly establish a
performance/power evaluation environment and gather
useful information to support system design and
software optimization for Android smartphones.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ganeshpure:2014:PDD,
author = "Kunal Ganeshpure and Sandip Kundu",
title = "Performance-driven dynamic thermal management of
{MPSoC} based on task rescheduling",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "11:1--11:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566661",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "High level of integration has led to the advent of
Multiprocessor System-on-Chip (MPSoC) which consists of
multiple processor cores and accelerators on the same
die. A MPSoC programming model is based on a task graph
where tasks are assigned to cores to maximize
performance. To address thermal hotspots in MPSoCs,
coarse-grain power management techniques based on
Dynamic Frequency Scaling (DFS) are widely used. DFS is
reactive in nature and has detrimental effects on
performance. We propose an alternative solution based
on dynamic task rescheduling where a temperature
prediction scheme is built into the scheduler. The
temperature look-ahead scheme is used for task
reassignment or delay insertion in scheduling. Since
temperature prediction and task assignment are done at
runtime, both must be simple and extremely fast. To
that end, we propose a heuristic solution based on a
limited branch-and-bound search and compare results
against an optimal Integer Linear Programming
(ILP)-based solution. The proposed approach is shown to
be superior to frequency scaling, and the resulting
schedule length is within 5\% to 10\% of the optimal
solution as obtained from ILP formulation.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Meyer:2014:CEL,
author = "Brett H. Meyer and Adam S. Hartman and Donald E.
Thomas",
title = "Cost-effective lifetime and yield optimization for
{NoC-based} {MPSoCs}",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "12:1--12:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2535575",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As manufacturing processes scale, designers are
increasingly dependent on techniques to mitigate
manufacturing defect and permanent failure. In embedded
systems-on-chip, system lifetime and yield can be
increased using slack -under-utilization in execution
and storage resources-so that when components are
defective, data and tasks can be remapped and
rescheduled. For any given system, the design space of
possible slack allocations is both large and complex,
consisting of every possible way to replace each
component in the initial system with another from the
component library. Based on the observation that useful
slack is often quantized, we have developed Critical
Quantity Slack Allocation (CQSA), an approach that
effectively and efficiently allocates execution and
storage slack to jointly optimize system yield and
cost. While exploring less than 1.4\% of the slack
allocation design space, our approach consistently
outperforms alternative slack allocation techniques to
find sets of designs within 1.4\% of the lifetime-cost
Pareto-optimal front. When applied to yield-cost
optimization, our approach again outperforms
alternative techniques, exploring less than 1.62\% of
the design space to find sets of designs within 4.27\%
of the yield-cost Pareto-optimal front. One advantage
of managing failure at the system level is that the
same techniques that improve lifetime often also
improve yield. As a result, with little modification,
CQSA is further able to perform effective joint
optimization of lifetime and yield, finding designs
within 1.6\% of the Pareto-optimal front.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2014:CRM,
author = "Jongeun Lee and Seongseok Seo and Jongkyung Paek and
Kiyoung Choi",
title = "Configurable range memory for effective data reuse on
programmable accelerators",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "13:1--13:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566662",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "While programmable accelerators such as
application-specific processors and reconfigurable
architectures can dramatically speed up
compute-intensive kernels of an application,
application performance can still be severely limited
by the communication between processors. To minimize
the communication overhead, a shared memory such as a
scratchpad memory may be employed between the main
processor and the accelerator coprocessor. However,
this setup poses a significant challenge to the main
processor, which now must manage data on the scratchpad
explicitly, resulting in superfluous data copying due
to the inflexibility of a scratchpad. In this article,
we present an enhancement of a scratchpad, Configurable
Range Memory (CRM), whose address range can be
reprogrammed to minimize unnecessary data copying
between processors and therefore promote data reuse on
the accelerator, and also present a software management
algorithm for the CRM. Our experimental results
involving detailed simulation of full multimedia
applications demonstrate that our CRM architecture can
reduce the communication overhead quite effectively,
reducing the kernel execution time by up to 28\% and
the application runtime by up to 12.8\%, in addition to
considerable system energy reduction, compared to the
conventional architecture based on a scratchpad.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hung:2014:AFD,
author = "Eddie Hung and Steven J. E. Wilton",
title = "Accelerating {FPGA} debug: Increasing visibility using
a runtime reconfigurable observation and triggering
network",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "14:1--14:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566668",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "FPGA technology is commonly used to prototype new
digital designs before entering fabrication. Whilst
these physical prototypes can operate many orders of
magnitude faster than through a logic simulator, a
fundamental limitation is their lack of on-chip
visibility when debugging. To counter this,
trace-buffer-based instrumentation can be installed
into the prototype, allowing designers to capture a
predetermined window of signal data during live
operation for offline analysis. However, instead of
requiring the designer to recompile their entire
circuit every time the window is modified, this article
proposes that an overlay network is constructed using
only spare FPGA routing multiplexers to connect all
circuit signals through to the trace instruments. Thus,
during debugging, designers would only need to
reconfigure this network instead of finding a new
place-and-route solution. Furthermore, we describe how
this network can deliver signals to both the trigger
and trace units of these instruments, which are
implemented simultaneously using dual-port RAMs. Our
results show that new network configurations connecting
any subset of signals to 80--90\% of the available RAM
capacity can be computed in less than 70 seconds, for a
100,000 LUT circuit, as many times as necessary. Our
tool-QuickTrace-is available for download.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Panerati:2014:CEM,
author = "Jacopo Panerati and Giovanni Beltrame",
title = "A comparative evaluation of multi-objective
exploration algorithms for high-level design",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "15:1--15:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566669",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a detailed overview and the
experimental comparison of 15 multi-objective
design-space exploration (DSE) algorithms for
high-level design. These algorithms are collected from
recent literature and include heuristic, evolutionary,
and statistical methods. To provide a fair comparison,
the algorithms are classified according to the approach
used and examined against a large set of metrics. In
particular, the effectiveness of each algorithm was
evaluated for the optimization of a multiprocessor
platform, considering initial setup effort, rate of
convergence, scalability, and quality of the resulting
optimization. Our experiments are performed with
statistical rigor, using a set of very diverse
benchmark applications (a video converter, a parallel
compression algorithm, and a fast Fourier
transformation algorithm) to take a large spectrum of
realistic workloads into account. Our results provide
insights on the effort required to apply each algorithm
to a target design space, the number of simulations it
requires, its accuracy, and its precision. These
insights are used to draw guidelines for the choice of
DSE algorithms according to the type and size of design
space to be optimized.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2014:CPA,
author = "Seokhyun Lee and Kiyoung Choi",
title = "Critical-path-aware high-level synthesis with
distributed controller for fast timing closure",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "16:1--16:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566670",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Centralized controllers commonly used in high-level
synthesis often require long wires and cause high load
capacitance, and that is why critical paths typically
occur on paths from controllers to data registers
instead of paths from data registers to data registers.
However, conventional high-level synthesis has focused
on delays within a datapath, making it difficult to
solve the timing closure problem during physical
synthesis. This article presents hardware architecture
with a distributed controller, which makes the timing
closure problem much easier. A novel
critical-path-aware high-level synthesis flow is also
presented for synthesizing such hardware through
datapath partitioning, register binding, and controller
optimization. We explore the design space related to
the number of partitions, which is an important design
parameter for target architecture. According to our
experiments, the proposed approach reduces the critical
path delay excluding FUs by 29.3\% and that including
FUs by 10.0\%, with 2.2\% area overhead on average
compared to centralized controller architecture.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wei:2014:TSE,
author = "Yaoguang Wei and Cliff Sze and Natarajan Viswanathan
and Zhuo Li and Charles J. Alpert and Lakshmi Reddy and
Andrew D. Huber and Gustavo E. Tellez and Douglas
Keller and Sachin S. Sapatnekar",
title = "Techniques for scalable and effective routability
evaluation",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566663",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Routing congestion has become a critical layout
challenge in nanoscale circuits since it is a critical
factor in determining the routability of a design. An
unroutable design is not useful even though it closes
on all other design metrics. Fast design closure can
only be achieved by accurately evaluating whether a
design is routable or not early in the design cycle.
Lately, it has become common to use a ``light mode''
version of a global router to quickly evaluate the
routability of a given placement. This approach suffers
from three weaknesses: (i) it does not adequately model
local routing resources, which can cause incorrect
routability predictions that are only detected late,
during detailed routing; (ii) the congestion maps
obtained by it tend to have isolated hotspots
surrounded by noncongested spots, called ``noisy
hotspots'', which further affects the accuracy in
routability evaluation; and (iii) the metrics used to
represent congestion may yield numbers that do not
provide sufficient intuition to the designer, and
moreover, they may often fail to predict the
routability accurately. This article presents solutions
to these issues. First, we propose three approaches to
model local routing resources. Second, we propose a
smoothing technique to reduce the number of noisy
hotspots and obtain a more accurate routability
evaluation result. Finally, we develop a new metric
which represents congestion maps with higher fidelity.
We apply the proposed techniques to several industrial
circuits and demonstrate that one can better predict
and evaluate design routability and that congestion
mitigation tools can perform much better to improve the
design routability.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2014:LPS,
author = "Irith Pomeranz",
title = "Low-power skewed-load tests based on functional
broadside tests",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566664",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article studies the generation of low-power
skewed-load tests such that the signal transitions (and
line values) they create during their fast functional
clock cycles match those of functional broadside tests.
Functional broadside tests create functional operation
conditions during their fast functional clock cycles.
As a result, the signal transitions that occur during
these clock cycles can also occur during functional
operation. The procedure described in this article
matches these signal-transitions on a line-by-line
basis when generating low-power skewed-load tests. The
procedure accepts a functional broadside test set for
transition faults. In one of its basic steps, the
procedure modifies a functional broadside test into a
skewed-load test. This allows it to retain many of the
signal transitions (and line values) of the functional
broadside test in the skewed-load test. Experimental
results for benchmark circuits demonstrate the extent
to which it is possible to match the signal-transitions
of skewed-load tests with those of functional broadside
tests while achieving the high transition fault
coverage that is typical of skewed-load tests.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2014:DTM,
author = "Irith Pomeranz",
title = "Design-for-testability for multi-cycle broadside tests
by holding of state variables",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566665",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article describes a design-for-testability
approach for increasing the transition fault coverage
of multi-cycle broadside tests. Earlier methods
addressed two-cycle tests. The importance of
multi-cycle tests results from the ability to produce
more compact test sets than possible with two-cycle
tests, from the fact that when multi-cycle tests are
applied at-speed, they can detect defects that are not
detected by two-cycle tests and from their ability to
avoid overtesting of delay faults. The approach
described in this article is based on holding the
values of selected state variables constant during the
functional clock cycles of a multi-cycle broadside
test. This allows new tests to be produced, which are
different from broadside tests, without relying on
nonfunctional toggling of state variables as in earlier
methods for two-cycle tests. Experimental results show
significant improvements in transition fault coverage
using a fixed set of hold configurations for two types
of multi-cycle broadside test sets: (1) test sets that
are stored and applied from an external tester, and (2)
functional broadside test sets that are generated using
on-chip hardware.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Biswas:2014:RTC,
author = "Sounil Biswas and Hongfei Wang and R. D. (Shawn)
Blanton",
title = "Reducing test cost of integrated, heterogeneous
systems using pass-fail test data analysis",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2566666",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Stringent quality requirements for integrated,
heterogeneous systems have led designers and test
engineers to mandate large sets of tests to be applied
to these systems, which, in turn, have resulted in
increased test cost. However, many of these tests are
unnecessary (i.e., redundant), since their outcomes can
be reliably predicted using results from other applied
tests. A methodology for identifying the redundant
tests of an integrated, heterogeneous system that has
only binary pass-fail test data is described. This
methodology uses decision trees, Boolean minimization,
and satisfiability as core components. Feasibility is
empirically demonstrated using test data from two
commercially fabricated systems, namely, a high-speed
serializer/deserializer (HSS) and a phase-locked loop
(PLL). Our analysis of test data from {$>$} 38,000 HSS
and {$>$} 22,000 PLL circuits show that 14 out of 40
HSS tests and 11 out of 36 PLL tests are redundant.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2014:BBL,
author = "Da-Wei Chang and Hsin-Hung Chen and Dau-Jieu Yang and
Hsung-Pin Chang",
title = "{BLAS}: Block-level adaptive striping for solid-state
drives",
journal = j-TODAES,
volume = "19",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2555616",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 21 18:21:14 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Increasing the degree of parallelism and reducing the
overhead of garbage collection (GC overhead) are the
two keys to enhancing the performance of solid-state
drives (SSDs). SSDs employ multichannel architectures,
and a data placement scheme in an SSD determines how
the data are striped to the channels. Without
considering the data access pattern, existing fixed and
device-level data placement schemes may have either
high GC overhead or poor I/O parallelism, resulting in
degraded performance. In this article, an adaptive
block-level data placement scheme called BLAS is
proposed to maximize the I/O parallelism while
simultaneously minimizing the GC overhead. In contrast
to existing device-level schemes, BLAS allows different
data placement policies for blocks with different
access patterns. Pages in read-intensive blocks are
scattered over various channels to maximize the degree
of read parallelism, while pages in each of the
remaining blocks are attempted to be gathered in the
same physical block to minimize the GC overhead.
Moreover, BLAS allows the placement policy for a
logical block to be changed dynamically according to
the access pattern changes of that block. Finally, a
parallelism-aware write buffer management approach is
adopted in BLAS to maximize the degree of write
parallelism. Performance results show that BLAS yields
a significant improvement in the SSD response time when
compared to existing device-level schemes. In
particular, BLAS outperforms device-level page striping
and device-level block striping by factors of up to
8.75 and 7.41, respectively. Moreover, BLAS achieves
low GC overhead and is effective in adapting to
workload changes.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bathen:2014:STS,
author = "Luis Angel D. Bathen and Nikil D. Dutt",
title = "{SPMCloud}: Towards the Single-Chip Embedded
{ScratchPad} Memory-Based Storage Cloud",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "22:1--22:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611755",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The era of cloud computing on-a-chip is enabled by the
aggressive move towards many-core platforms and the
rapid adoption of Network-on-Chips. As a result, there
is a need for large-scale distributed on-chip shared
memories that are reliable, low power, and seamlessly
manageable. In this work, we propose SPMCloud, a novel
scratchpad-memory-based cloud-inspired volatile storage
subsystem designed to meet the needs of
future-generation many-core platforms. SPMCloud is
composed of several concepts, including: (1) a highly
scalable data-center-like memory subsystem that
exploits two enterprise-network-inspired memory
configurations, namely, embedded Network Attached
Storage (eNAS) and embedded Storage Area Network (
eSAN), and (2) on-demand allocation of reliable memory
space through memory virtualization and the use of
embedded RAIDs. Our experimental results on
Mediabench/CHStone benchmarks show that the SPMCloud 's
fully distributed reliable memory subsystems can
achieve 48\% energy savings and 70\% latency reduction
on average over state-of-the-art NoC memory reliability
techniques. We then evaluate the scalability of the
SPMCloud and compare it with traditional SPM allocation
policies. The SPMCloud 's dynamic allocator outperforms
the best competition by an average 60\% (eNAS) and 46\%
(eSAN) when the platform runs at 250 MHz and by an
average 80\% (eNAS) and 40\% when running at 1 GHz.
Moreover, the SPMCloud achieves an average 83\% energy
savings across all configurations (number of cores)
with respect to the best competitors when running at
250 MHz and 1 GHz. We then studied the SPM hit ratio
across the various allocation policies discussed in
this article and showed that on average the SPMCloud 's
priority-driven dynamic allocation policy achieves
93.5\% SPM hit ratio, 0.6\% higher hit ratio than the
closest allocation policy. We then showed that the eNAS
and eSAN achieve an average of 67.9\% and 29\%
reduction in execution time, respectively, over the
best competitor. Similarly, the eNAS and eSAN achieve
an average of 82.7\% and 82.3\% energy savings,
respectively, over the best competitor. Furthermore, we
evaluated the scalability of the SPMCloud and its
performance/energy efficiency when providing support
for some of the heavier E-RAID levels, and showed that
the eNAS / eSAN configurations with SECDED achieve an
average of 51.5\% and 34.9\% reduction in execution
time, respectively, over the best competitor with
SECDED. Similarly, the eNAS / eSAN configurations with
E-RAID Level 1, + SECDED achieve an average of 82.3\%
and 75.6\% energy savings, respectively, over the best
competitor.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Rosales:2014:MHA,
author = "Rafael Rosales and Michael Glass and J{\"u}rgen Teich
and Bo Wang and Yang Xu and Ralph Hasholzner",
title = "{MAESTRO} --- Holistic Actor-Oriented Modeling of
Nonfunctional Properties and Firmware Behavior for
{MPSoCs}",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "23:1--23:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2594481",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modeling and evaluating nonfunctional properties such
as performance, power, and reliability of embedded
systems are tasks of utmost importance. In this
article, we introduce M AESTRO, a methodology for the
modeling and evaluation of nonfunctional properties and
embedded firmware of MPSoC architecture components at
the Electronic System Level (ESL). In contrast to
existing design flows that provide predefined
performance models, MAESTRO defines a flexible approach
that allows to define virtual prototypes that can be
easily customized and extended to evaluate multiple
nonfunctional properties of interest at different
levels of abstraction. In MAESTRO, a design is composed
purely from actor-oriented models. This enables typical
ESL features such as automatic design space exploration
and synthesizability of HW and SW components, typically
missing in very general design flows. Unique to MAESTRO
is the separation and coordination of the interaction
between application functionality, firmware, and
performance models for the evaluation of nonfunctional
properties, and their complex interactions within a
single Model-of-Computation (MoC). The main advantages
of MAESTRO are: (I) Extensible modeling of
interdependent nonfunctional properties of
heterogeneous MPSoC components; (II) high flexibility
to investigate the appropriate trade-off between
modeling effort and accuracy of nonfunctional property
evaluators; (III) a holistic approach for modeling
application functionality as well as firmware affecting
the evaluation of nonfunctional properties. Regarding
(II), we present a mobile baseband processor platform
use-case, executing a GSM paging application. To
demonstrate (I) and (III), we present the modeling of a
complex ESL processor virtual prototype, running a soft
real-time application and equipped with both a power
and reliability manager.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2014:ICP,
author = "Libo Huang and Zhiying Wang and Nong Xiao and Yongwen
Wang and Qiang Dou",
title = "Integrated Coherence Prediction: Towards Efficient
Cache Coherence on {NoC}-Based Multicore
Architectures",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "24:1--24:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611756",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicore architectures with Network-on-Chips (NoCs)
have been widely recognized as the de facto design for
the efficient utilization of the continuously
increasing density of transistors on a chip. A key
challenge in designing such an NoC-based multicore
processor is maintaining cache coherence in an
efficient manner. Directory-based protocols avoid the
bandwidth overhead of snoop-based protocols, therefore
scaling to a large number of cores. However,
conventional directory structures add significant
indirection delay to cache-to-cache accesses in larger
multicore processor. In this article we propose a novel
hardware coherence technique, called integrated
coherence prediction (ICP). This approach adopts a
prediction technique for managing shared data to reduce
or eliminate the cache-to-cache delay in coherence
accesses. ICP has two unique features that differ from
previous coherence prediction techniques. First, ICP
introduces a new integrated prediction scheme that
combines two kinds of predictors: owner predictor,
which predicts the data writers and avoids the
indirection through directory, and data predictor,
which predicts the access address and prefetches data
from remote nodes directly. Second, ICP uses a request
replication method to reduce the negative effect of
wrong owner prediction operations, thus facilitating
overall performance improvement. We present the design
and implementation details of the ICP approach. Using
detailed full-system simulations, we conclude that the
ICP provides a cost-effective solution for designing
high-performance multicore processors.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2014:GCM,
author = "Po-Chun Huang and Yuan-Hao Chang and Kam-Yiu Lam and
Jian-Tao Wang and Chien-Chin Huang",
title = "Garbage Collection for Multiversion Index in
Flash-Based Embedded Databases",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "25:1--25:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611757",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recently, flash-based embedded databases have gained
their momentum in various control and monitoring
systems, such as cyber-physical systems (CPSes). To
support the functionality to access the historical
data, a multiversion index is adopted to simultaneously
maintain multiple versions of data items, as well as
their index information. However, maintaining a
multiversion index on flash memory incurs considerable
performance overheads on garbage collection, which is
to reclaim the spaces occupied by the outdated/invalid
data items and their index information on flash memory.
In this work, we propose an efficient garbage
collection strategy to solve the garbage collection
issues of flash-based multiversion databases. In
particular, a version-tracking method is proposed to
accelerate the performance on the process on
identifying/reclaiming the space of invalid data and
their indexes, and a pre-summary method is also
designed to solve the cascading update problem that is
caused by the write-once nature of flash memory and is
worsened when more versions refer to the same data
item. The capability of the proposed strategy is then
verified by analytical and experimental studies.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lim:2014:PMG,
author = "Jieun Lim and Nagesh B. Lakshminarayana and Hyesoon
Kim and William Song and Sudhakar Yalamanchili and
Wonyong Sung",
title = "Power Modeling for {GPU} Architectures Using {McPAT}",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "26:1--26:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611758",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Graphics Processing Units (GPUs) are very popular for
both graphics and general-purpose applications. Since
GPUs operate many processing units and manage multiple
levels of memory hierarchy, they consume a significant
amount of power. Although several power models for CPUs
are available, the power consumption of GPUs has not
been studied much yet. In this article we develop a new
power model for GPUs by utilizing McPAT, a CPU power
tool. We generate initial power model data from McPAT
with a detailed GPU configuration, and then adjust the
models by comparing them with empirical data. We use
the NVIDIA's Fermi architecture for building the power
model, and our model estimates the GPU power
consumption with an average error of 7.7\% and 12.8\%
for the microbenchmarks and Merge benchmarks,
respectively.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2014:DCC,
author = "Chia-Wei Lee and Sun-Yuan Hsieh",
title = "Diagnosability of Component-Composition Graphs in the
{MM*} Model",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "27:1--27:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611759",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Diagnosability is an important metric for measuring
the reliability of multiprocessor systems. This article
adopts the MM* model and outlines the common properties
of a wide class of interconnection networks, called
component-composition graphs (CCGs), to determine their
diagnosability by using their obtained properties. By
applying the results to multiprocessor systems, the
diagnosability of hypercube-like networks (including
hypercubes, crossed cubes, M{\"o}bius cubes, twisted
cubes, locally twisted cubes, generalized twisted
cubes, and recursive circulants), star graphs, pancake
graphs, bubble-sort graphs, and burnt pancake graphs,
all of which belong to the class of CCGs, can also be
computed.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Erb:2014:ELF,
author = "Dominik Erb and Michael A. Kochte and Matthias Sauer
and Stefan Hillebrecht and Tobias Schubert and
Hans-Joachim Wunderlich and Bernd Becker",
title = "Exact Logic and Fault Simulation in Presence of
Unknowns",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "28:1--28:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611760",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Logic and fault simulation are essential techniques in
electronic design automation. The accuracy of standard
simulation algorithms is compromised by unknown or
X-values. This results in a pessimistic overestimation
of X-valued signals in the circuit and a pessimistic
underestimation of fault coverage. This work proposes
efficient algorithms for combinational and sequential
logic as well as for stuck-at and transition-delay
fault simulation that are free of any simulation
pessimism in presence of unknowns. The SAT-based
algorithms exactly classifiy all signal states. During
fault simulation, each fault is accurately classified
as either undetected, definitely detected, or possibly
detected. The pessimism with respect to unknowns
present in classic algorithms is thoroughly
investigated in the experimental results on benchmark
circuits. The applicability of the proposed algorithms
is demonstrated on larger industrial circuits. The
results show that, by accurate analysis, the number of
detected faults can be significantly increased without
increasing the test-set size.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2014:EFG,
author = "Jackey Z. Yan and Natarajan Viswanathan and Chris
Chu",
title = "An Effective Floorplan-Guided Placement Algorithm for
Large-Scale Mixed-Size Designs",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "29:1--29:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611761",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article we propose an effective algorithm flow
to handle modern large-scale mixed-size placement, both
with and without geometry constraints. The basic idea
is to use floorplanning to guide the placement of
objects at the global level. The flow consists of four
steps: (1) The objects in the original netlist are
clustered into blocks; (2) floorplanning is performed
on the blocks; (3) the blocks are shifted within the
chip region to further optimize the wirelength; (4)
with large macro-locations fixed, incremental placement
is applied to place the remaining objects. There are
several advantages to handling placement at the global
level with a floorplanning technique. First, the
problem size can be significantly reduced. Second,
exact Half-Perimeter WireLength (HPWL) can be
minimized. Third, better object distribution can be
achieved so that legalization only needs to handle
minor overlaps among small objects in a block. Fourth,
macro-rotation and various geometry constraints can be
handled. To demonstrate the effectiveness of this new
flow, we implement a high-quality and efficient
floorplan-guided placer called FLOP. We also construct
the Modern Mixed-Size (MMS) placement benchmarks that
can effectively represent the complexities of modern
mixed-size designs and the challenges faced by modern
mixed-size placers. Compared with most state-of-the-art
mixed-size placers and leading macroplacers,
experimental results show that FLOP achieves the best
HPWL and easily obtains legal solutions on all circuits
with all geometry constraints satisfied.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kang:2014:IRA,
author = "Minseok Kang and Taewhan Kim",
title = "Integrated Resource Allocation and Binding in Clock
Mesh Synthesis",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "30:1--30:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611762",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The clock distribution network in a synchronous
digital circuit delivers a clock signal to every
storage element, that is, clock sink in the circuit.
However, since the continued technology scaling
increases PVT (process-voltage-temperature) variation,
the increase of clock-skew variation is highly likely
to cause performance degradation or system failure at
runtime. Recently, to mitigate the clock-skew
variation, many researchers have taken a profound
interest in the clock mesh network. However, though the
structure of the clock mesh network is excellent in
tolerating timing variations, it demands significantly
high power consumption due to the use of excessive mesh
wire and buffer resources. Thus, optimizing the
resources required in the mesh clock synthesis while
maintaining the variation tolerance is crucially
important. The three major tasks that greatly affect
the cost of the resulting clock mesh are: (1) mesh
segment allocation, (2) mesh buffer allocation and
sizing, and (3) clock sink binding to mesh segments.
Previous clock mesh optimization approaches solve the
three tasks sequentially, one by one at a time, to
manage the runtime complexity of the tasks at the
expense of losing the quality of results. However,
since the three tasks are tightly interrelated,
simultaneously optimizing all three tasks is essential,
if the runtime is ever permitted, to synthesize an
economical clock mesh network. In this work, we propose
an approach that is able to tackle the problem in an
integrated fashion by combining the three tasks into an
iterative framework of incremental updates and solving
them simultaneously to find a globally optimal
allocation of mesh resources while taking into account
the clock-skew tolerance constraints. The core parts of
this work are a precise analysis on the relation among
the resource optimization tasks and an establishment of
a mechanism for effective and efficient integration of
the tasks. In particular, to handle the runtime
problem, we propose a set of speedup techniques, that
is, modeling the RC circuit for eliminating redundant
matrix multiplications, exploiting a sliding-window
scheme, and quickly estimating the buffer sizing
effect, which are fitted into our context of fast
clock-skew estimation in mesh resource optimization as
well as an invention of early decision policies.
Through extensive experiments with benchmark circuits,
it is shown that our proposed clock mesh synthesizer is
able to reduce the worst-case clock skew, total mesh
wirelength, total size of mesh driving buffers, and
total clock mesh power consumption including
short-circuit power by 25.0\%, 13.2\%, 10.9\%, and
11.0\% on average compared to that produced by the
best-known clock mesh synthesis method (MeshWorks),
respectively.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Boghrati:2014:IAP,
author = "Baktash Boghrati and Sachin S. Sapatnekar",
title = "Incremental Analysis of Power Grids Using Backward
Random Walks",
journal = j-TODAES,
volume = "19",
number = "3",
pages = "31:1--31:??",
month = jun,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2611763",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Jun 21 07:58:42 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power grid design and analysis is a critical part of
modern VLSI chip design and demands tools for accurate
modeling and efficient analysis. The process of power
grid design is inherently iterative, during which
numerous small changes are made to an initial design,
either to enhance the design or to fix design
constraint violations. Due to the large sizes of power
grids in modern chips, updating the solution for these
perturbations can be a computationally intensive task.
In this work, we first introduce an accurate modeling
methodology for power grids that, contrary to
conventional models, can result in asymmetrical
equations. Next, we propose an efficient and accurate
incremental solver that utilizes the backward random
walks to identify the region of influence of the
perturbation. The solution of the network is then
updated for this significantly smaller region only. The
proposed algorithm is capable of handling both
symmetrical and asymmetrical power grid equations.
Moreover, it can handle consecutive perturbations
without any degradation in the quality of the solution.
Experimental results show speedups of up to 13$ \times
$ for our incremental solver, as compared to a full
resolve of the power grid.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Schneider:2014:QNE,
author = "Reinhard Schneider and Dip Goswami and Samarjit
Chakraborty and Unmesh Bordoloi and Petru Eles and Zebo
Peng",
title = "Quantifying Notions of Extensibility in {FlexRay}
Schedule Synthesis",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "32:1--32:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2647954",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "FlexRay has now become a well-established in-vehicle
communication bus at most original equipment
manufacturers (OEMs) such as BMW, Audi, and GM. Given
the increasing cost of verification and the high degree
of crosslinking between components in automotive
architectures, an incremental design process is
commonly followed. In order to incorporate
FlexRay-based designs in such a process, the resulting
schedules must be extensible, that is: (i) when
messages are added in later iterations, they must
preserve deadline guarantees of already scheduled
messages, and (ii) they must accommodate as many new
messages as possible without changes to existing
schedules. Apart from extensible scheduling having not
received much attention so far, traditional metrics
used for quantifying them cannot be trivially adapted
to FlexRay schedules. This is because they do not
exploit specific properties of the FlexRay protocol. In
this article we, for the first time, introduce new
notions of extensibility for FlexRay that capture all
the protocol-specific properties. In particular, we
focus on the dynamic segment of FlexRay and we present
a number of metrics to quantify extensible schedules.
Based on the introduced metrics, we propose strategies
to synthesize extensible schedules and compare the
results of different scheduling algorithms. We
demonstrate the applicability of the results with
industrial-size case studies and also show that the
proposed metrics may also be visually represented,
thereby allowing for easy interpretation.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pan:2014:SPM,
author = "Gung-Yu Pan and Jing-Yang Jou and Bo-Cheng Lai",
title = "Scalable Power Management Using Multilevel
Reinforcement Learning for Multiprocessors",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "33:1--33:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2629486",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Dynamic power management has become an imperative
design factor to attain the energy efficiency in modern
systems. Among various power management schemes,
learning-based policies that are adaptive to different
environments and applications have demonstrated
superior performance to other approaches. However, they
suffer the scalability problem for multiprocessors due
to the increasing number of cores in a system. In this
article, we propose a scalable and effective online
policy called MultiLevel Reinforcement Learning (MLRL).
By exploiting the hierarchical paradigm, the time
complexity of MLRL is O ( n lg n ) for n cores and the
convergence rate is greatly raised by compressing
redundant searching space. Some advanced techniques,
such as the function approximation and the action
selection scheme, are included to enhance the
generality and stability of the proposed policy. By
simulating on the SPLASH-2 benchmarks, MLRL runs 53\%
faster and outperforms the state-of-the-art work with
13.6\% energy saving and 2.7\% latency penalty on
average. The generality and the scalability of MLRL are
also validated through extensive simulations.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2014:WLL,
author = "Yoon Seok Yang and Reeshav Kumar and Gwan Choi and
Paul V. Gratz",
title = "{WaveSync}: Low-Latency Source-Synchronous Bypass
Network-on-Chip Architecture",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "34:1--34:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2647950",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "WaveSync is a network-on-chip architecture for a
globally asynchronous locally-synchronous (GALS)
design. The WaveSync design facilitates low-latency
communication leveraging the source-synchronous clock
sent along with the data to time components in the
datapath of a downstream router, reducing the number of
synchronizations needed. WaveSync accomplishes this by
partitioning the router components at each node into
different clock domains, each synchronized with one of
the orthogonal incoming source-synchronous clocks in a
GALS 2D mesh network. The data and clock subsequently
propagate through each node/router synchronously until
the destination is reached, regardless of the number of
hops this may take. As long as the data travels in the
path of clock propagation and no congestion is
encountered, it will be propagated without latching as
if in a long combinatorial path, with both the clock
and the data accruing delay at the same rate. The
result is that the need for synchronization between the
mesochronous nodes and/or the asynchronous control
associated with the typical GALS network is completely
eliminated. To further reduce the latency overhead of
synchronization, for those occasions when
synchronization is still required (when a flit takes a
turn or arrives at the destination), we propose a novel
less-than-one-cycle synchronizer. The proposed WaveSync
network outperforms conventional GALS networks by
87--90\% in average latency, synthesized using a 45nm
CMOS library.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jose:2014:IAH,
author = "John Jose and Madhu Mutyam",
title = "Implementation and Analysis of History-Based Output
Channel Selection Strategies for Adaptive Routers in
Mesh {NoCs}",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "35:1--35:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2647952",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The efficiency and effectiveness of an adaptive router
in an NoC-based multicore system is evaluated by the
performance it achieves under varying inter-core
communication traffic. A well-designed selection
strategy plays an important role in an adaptive router
to act upon dynamic traffic variations. The
effectiveness of a selection strategy depends on what
metric is used to represent congestion, how precisely
this metric captures the actual congestion, and how
much cost is involved in capturing the congestion on a
real-time scale. Congestion is formed over a period of
time due to cumulative and chain reaction effects. We
propose novel history-based selection strategies that
could be used with any adaptive, deadlock-free, minimal
routing in mesh NoCs. Buffer occupancy time and rate of
flit flow across reachable ports of neighboring routers
in the recent past are captured, propagated, and
maintained in a cost-effective way to compute the
selection metric. Experimental results on real and
synthetic workloads show that our proposed selection
strategies significantly outperform state-of-the-art
techniques.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tsai:2014:PAE,
author = "Kun-Lin Tsai and Hao-Tse Chen and Yo-An Lin",
title = "Power and Area Efficiency {NoC} Router Design for
Application-Specific {SoC} by Using Buffer Merging and
Resource Sharing",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "36:1--36:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2633604",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Network-on-Chip (NoC) is an efficient on-chip
communication architecture specifically for
System-on-a-Chip (SoC) design. However, the input
buffers of a NoC router often take a significant
portion of the silicon area and power consumption.
Besides, the performance of a NoC is also greatly
affected by the buffer size. In this article, a static
buffer merging and resource sharing method is proposed
for the application-specific SoC minimizing the NoC
buffer. When given an application-specific task graph
and the dataflow distribution, the proposed method
statically merges rarely used buffers and generates the
suitable number of input buffers for each router at
design timely. The merged buffer is shared by several
input directions. The experimental result shows that
the buffer can be utilized more effectively after the
resource sharing. Based on the synthesized design with
TSMC 90nm technology, the proposed method reduces an
average of 42.23\% area and 35.13\% power while
providing similar performance.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hatami:2014:MSN,
author = "Nadereh Hatami and Rafal Baranowski and Paolo Prinetto
and Hans-Joachim Wunderlich",
title = "Multilevel Simulation of Nonfunctional Properties by
Piecewise Evaluation",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "37:1--37:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2647955",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the technology shrinks, nonfunctional properties
(NFPs) such as reliability, vulnerability, power
consumption, or heat dissipation become as important as
system functionality. As NFPs often influence each
other, depend on the application and workload of a
system, and exhibit nonlinear behavior, NFP simulation
over long periods of system operation is
computationally expensive, if feasible at all. This
article presents a piecewise evaluation method for
efficient NFP simulation. Simulation time is divided
into intervals called evaluation windows, within which
the NFP models are partially linearized. High-speed
functional system simulation is achieved by parallel
execution of models at different levels of abstraction.
A trade-off between simulation speed and accuracy is
met by adjusting the size of the evaluation window. As
an example, the piecewise evaluation technique is
applied to analyze aging caused by two mechanisms,
namely Negative Bias Temperature Instability (NBTI) and
Hot Carrier Injection (HCI), in order to identify
reliability hotspots. Experiments show that the
proposed technique yields considerable simulation
speedup at a marginal loss of accuracy.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ravi:2014:HLT,
author = "Srivaths Ravi and Michael Joseph",
title = "High-Level Test Synthesis: a Survey from Synthesis
Process Flow Perspective",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "38:1--38:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2627754",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "High-level test synthesis is a special class of
high-level synthesis having testability as one of the
important components. This article presents a detailed
survey on recent developments in high-level test
synthesis from a synthesis process flow perspective. It
also presents a survey on controller synthesis
techniques for testability.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Juan:2014:SPT,
author = "Da-Cheng Juan and Siddharth Garg and Diana
Marculescu",
title = "Statistical Peak Temperature Prediction and Thermal
Yield Improvement for {$3$D} Chip Multiprocessors",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "39:1--39:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2633606",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Thermal issues have become critical roadblocks for
achieving highly reliable three-dimensional (3D)
integrated circuits (ICs). The presence of process
variations further exacerbates these problems. In this
article, we propose techniques for the efficient
evaluation and mitigation of the impact of leakage
power variations on the temperature profile of 3D Chip
Multiprocessors (CMPs). Experimental results
demonstrate that, due to the impact of process
variations, a 4-tier 3D implementation can be more than
40C hotter and 23\% leakier than its 2D counterpart.
To determine the maximum temperature of each fabricated
3D IC, we propose an accurate learning-based model for
peak temperature prediction. Based on the learning
model, we then propose two post-fabrication techniques
to increase the thermal yield of 3D CMPs: (1) tier
restacking and (2) thermally-aware die matching.
Experimental results show that: (1) the proposed
prediction model achieves more than 98\% accuracy, and
(2) the proposed thermally-aware, post-fabrication
optimization techniques significantly improve the
thermal yield from only 51\% to 99\% for 3D CMPs.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Livramento:2014:HTD,
author = "Vinicius S. Livramento and Chrystian Guth and Jos{\'e}
Lu{\'\i}s G{\"u}ntzel and Marcelo O. Johann",
title = "A Hybrid Technique for Discrete Gate Sizing Based on
{Lagrangian} Relaxation",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "40:1--40:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2647956",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Discrete gate sizing has attracted a lot of attention
recently as the EDA industry faces the challenge of
optimizing large standard cell-based circuits. The
discrete nature of the problem, along with complex
timing models, stringent design constraints, and
ever-increasing circuit sizes, make the problem very
difficult to tackle. Lagrangian Relaxation (LR) is an
effective technique to handle complex constrained
optimization problems and therefore has been
successfully applied to solve the gate sizing problem.
This article proposes an improved Lagrangian relaxation
formulation for discrete gate sizing that relaxes
timing, maximum gate input slew, and maximum gate
output capacitance constraints. Based on such
formulation, we propose a hybrid technique composed of
three steps. First, a topological greedy heuristic
solves the LR formulation. Such a heuristic is applied
assuming a slightly increased target clock period
(backoff factor) to better explore the solution space.
Second, a delay recovery heuristic reestablishes the
original target clock with small power overhead. Third,
a power recovery heuristic explores the remaining
slacks to further reduce power. Experiments on the ISPD
2012 Contest benchmarks show that our hybrid technique
provides less leakage power than the state-of-the-art
work for every circuit from the ISPD 2012 Contest
infrastructure, achieving up to 24\% less leakage. In
addition, our technique achieves a much better
compromise between leakage reduction and runtime,
obtaining, on average, 9\% less leakage power while
running 8.8 times faster.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ho:2014:USS,
author = "Yenpo Ho and Garng M. Huang and Peng Li",
title = "Understanding {SRAM} Stability via Bifurcation
Analysis: Analytical Models and Scaling Trends",
journal = j-TODAES,
volume = "19",
number = "4",
pages = "41:1--41:??",
month = aug,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2647957",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Aug 25 19:03:51 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the past decades, aggressive scaling of transistor
feature size has been a primary force driving higher
Static Random Access Memory (SRAM) integration density.
Due to technology scaling, nanometer SRAM designs
become increasingly vulnerable to stability challenges.
The traditional way of analyzing stability is through
the use of Static Noise Margins (SNMs). SNMs are not
capable of capturing the key nonlinear dynamics
associated with memory operations, leading to imprecise
characterization of stability. This work rigorously
develops dynamic stability concepts and, more
importantly, captures them in physically based
analytical models. By leveraging nonlinear stability
theory, we develop analytical models that characterize
the minimum required amplitude and duration of injected
current noises that can flip the SRAM state. These
models, which are parameterized in key design,
technology, and operating condition parameters, provide
important design insights and offer a basis for
predicting scaling trends of SRAM dynamic stability.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2014:EBT,
author = "Naehyuck Chang and David Z. Pan and Yuan Xie",
title = "Editorial: {{\booktitle{ACM Transactions on Design
Automation of Electronics Systems}}} and Beyond",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "1:1--1:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2676865",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hu:2014:GLI,
author = "Wei Hu and Dejun Mu and Jason Oberg and Baolei Mao and
Mohit Tiwari and Timothy Sherwood and Ryan Kastner",
title = "Gate-Level Information Flow Tracking for Security
Lattices",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "2:1--2:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2676548",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "High-assurance systems found in safety-critical
infrastructures are facing steadily increasing cyber
threats. These critical systems require rigorous
guarantees in information flow security to prevent
confidential information from leaking to an
unclassified domain and the root of trust from being
violated by an untrusted party. To enforce bit-tight
information flow control, gate-level information flow
tracking (GLIFT) has recently been proposed to
precisely measure and manage all digital information
flows in the underlying hardware, including implicit
flows through hardware-specific timing channels.
However, existing work in this realm either restricts
to two-level security labels or essentially targets
two-input primitive gates and several simple multilevel
security lattices. This article provides a general way
to expand the GLIFT method for multilevel security.
Specifically, it formalizes tracking logic for an
arbitrary Boolean gate under finite security lattices,
presents a precise tracking logic generation method for
eliminating false positives in GLIFT logic created in a
constructive manner, and illustrates application
scenarios of GLIFT for enforcing multilevel information
flow security. Experimental results show various
trade-offs in precision and performance of GLIFT logic
created using different methods. It also reveals the
area and performance overheads that should be expected
when expanding GLIFT for multilevel security.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2014:CTS,
author = "Chun-Kai Wang and Yeh-Chi Chang and Hung-Ming Chen and
Ching-Yu Chin",
title = "Clock Tree Synthesis Considering Slew Effect on Supply
Voltage Variation",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "3:1--3:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2651401",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This work tackles a problem of clock power
minimization within a skew constraint under supply
voltage variation. This problem is defined in the ISPD
2010 benchmark. Unlike mesh and cross link that reduce
clock skew uncertainty by multiple driving paths, our
focus is on controlling skew uncertainty in the
structure of the tree. We observe that slow slew
amplifies supply voltage variation, which induces
larger path delay variation and skew uncertainty. To
obtain the optimality, we formulate a symmetric clock
tree synthesis as a mathematical programming problem in
which the slew effect is considered by an NLDM-like
cell delay variation model. A symmetry-to-asymmetry
tree transformation is proposed to further reduce wire
loading. Experimental results show that the proposed
four methods save up to 20\% of clock tree capacitance
loading. Beyond controlling slew to suppress
supply-voltage-variation-induced skew, we also discuss
the strategies of clock tree synthesis under variant
variation scenarios and the limitations of the ISPD
2010 benchmark.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2014:SIS,
author = "Lingyi Liu and Shobha Vasudevan",
title = "Scaling Input Stimulus Generation through Hybrid
Static and Dynamic Analysis of {RTL}",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "4:1--4:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2676549",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We enhance STAR, an automatic technique for functional
input vector generation for design validation. STAR
statically analyzes the source code of the
Register-Transfer Level (RTL) design. The STAR approach
is a hybrid between RTL symbolic execution and concrete
simulation that offsets the disadvantages of both. The
symbolic execution, which follows the concrete
simulation path, extracts constraints for that path.
The guard in the path constraints is then mutated and
passed to an SMT solver. A satisfiable assignment
generates a valid input vector. However, STAR suffers
the problem of path explosion during symbolic
execution. In this article, we present an explored
symbolic state caching method to attack path explosion.
Explored symbolic states are states starting from which
all subpaths have been explored. Each explored symbolic
state is stored in the form of bitmap encoding of
branches to ease comparison. When the explored symbolic
state is reached again in the following symbolic
execution, all subpaths can be pruned. In addition, we
use two types of optimizations: (a) dynamic UD chain
slicing; and (b) local conflict resolution to improve
the running efficiency of STAR. We demonstrate that the
results of the enhanced STAR are promising in showing
high coverage on benchmark RTL designs, and the runtime
of the test generation process is reduced from several
hours to less than 20 minutes.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sinha:2014:DGP,
author = "Sharad Sinha and Thambipillai Srikanthan",
title = "Dataflow Graph Partitioning for Area-Efficient
High-Level Synthesis with Systems Perspective",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "5:1--5:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2660769",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Area efficiency in datapath synthesis is a widely
accepted goal of high-level synthesis. Applications
represented by their dataflow graphs are synthesized
using resource sharing principles to reduce the area.
However, existing resource sharing algorithms focus on
absolute area reduction and maximal resource sharing.
This kind of a design approach leads to constraints on
how often, in terms of number of clock cycles, a new
set of input data can be fed to an application. It also
leads to very large multiplexers in case of very big
dataflow graphs with hundreds of nodes. An adaptive
dataflow graph partitioning algorithm is proposed that
partitions a graph taking into account a user-defined
constraint on how often a new set of input data
(generally referred to as data initiation interval) is
available. At the same time, a resource sharing
algorithm is applied to such partitions in order to
reduce area. Multiple design points are generated for a
given dataflow graph with different area and time
measures to enable a designer to make decisions. We
demonstrate our graph partitioning algorithm using
synthetically generated large dataflow graphs and on
some benchmark applications.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gange:2014:SOS,
author = "Graeme Gange and Harald S{\o}ndergaard and Peter J.
Stuckey",
title = "Synthesizing Optimal Switching Lattices",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "6:1--6:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2661632",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The use of nanoscale technologies to create electronic
devices has revived interest in the use of regular
structures for defining complex logic functions. One
such structure is the switching lattice, a
two-dimensional lattice of four-terminal switches. We
show how to directly construct switching lattices of
polynomial size from arbitrary logic functions; we also
show how to synthesize minimal-sized lattices by
translating the problem to the satisfiability problem
for a restricted class of quantified Boolean formulas.
The synthesis method is an anytime algorithm that uses
modern SAT solving technology and dichotomic search. It
improves considerably on an earlier proposal for
creating switching lattices for arbitrary logic
functions.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cheng:2014:ECD,
author = "An-Che Cheng and Chia-Chih (Jack) Yen and Celina G.
Val and Sam Bayless and Alan J. Hu and Iris Hui-Ru
Jiang and Jing-Yang Jou",
title = "Efficient Coverage-Driven Stimulus Generation Using
Simultaneous {SAT} Solving, with Application to
{SystemVerilog}",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "7:1--7:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2651400",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "SystemVerilog provides powerful language constructs
for verification, and one of them is the covergroup
functional coverage model. This model is designed as a
complement to assertion verification, that is, it has
the advantage of defining cross-coverage over multiple
coverage points. In this article, a coverage-driven
verification (CDV) approach is formulated as a
simultaneous Boolean satisfiability (SAT) problem that
is based on covergroups. The coverage bins defined by
the functional model are converted into Conjunction
Normal Form (CNF) and then solved together by our
proposed simultaneous SAT algorithm PLNSAT to generate
stimuli for improving coverage. The basic PLNSAT
algorithm is then extended in our second proposed
algorithm GPLNSAT, which exploits additional
information gleaned from the structure of SystemVerilog
covergroups. Compared to generating stimuli separately,
the simultaneous SAT approaches can share learned
knowledge across each coverage target, thus reducing
the overall solving time drastically. Experimental
results on a UART circuit and the largest ITC benchmark
circuits show that the proposed algorithms can achieve
10.8x speedup on average and outperform
state-of-the-art techniques in most of the
benchmarks.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2014:SUM,
author = "Xueliang Li and Guihai Yan and Yinhe Han and Xiaowei
Li",
title = "{SmartCap}: Using Machine Learning for Power
Adaptation of {Smartphone}'s Application Processor",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "8:1--8:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2651402",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power efficiency is increasingly critical to
battery-powered smartphones. Given that the using
experience is most valued by the user, we propose that
the power optimization should directly respect the user
experience. We conduct a statistical sample survey and
study the correlation among the user experience, system
runtime activities, and computational performance of an
application processor. We find that there exists a
minimal frequency requirement, called ``saturated
frequency''. Above this frequency, the device consumes
more power but provides little improvements in user
experience. This study motivates an intelligent
self-adaptive scheme, SmartCap, that automatically
identifies the most power-efficient state of the
application processor. Compared to prior Linux power
adaptation schemes, SmartCap can help save power from
11\% to 84\%, depending on applications, with little
decline in user experience.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shih:2014:COR,
author = "Wen-Li Shih and Yi-Ping You and Chung-Wen Huang and
Jenq Kuen Lee",
title = "Compiler Optimization for Reducing Leakage Power in
Multithread {BSP} Programs",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "9:1--9:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2668119",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multithread programming is widely adopted in novel
embedded system applications due to its high
performance and flexibility. This article addresses
compiler optimization for reducing the power
consumption of multithread programs. A traditional
compiler employs energy management techniques that
analyze component usage in control-flow graphs with a
focus on single-thread programs. In this environment
the leakage power can be controlled by inserting on and
off instructions based on component usage information
generated by flow equations. However, these methods
cannot be directly extended to a multithread
environment due to concurrent execution issues. This
article presents a multithread power-gating framework
composed of multithread power-gating analysis (MTPGA)
and predicated power-gating (PPG) energy management
mechanisms for reducing the leakage power when
executing multithread programs on simultaneous
multithreading (SMT) machines. Our multithread
programming model is based on hierarchical
bulk-synchronous parallel (BSP) models. Based on a
multithread component analysis with dataflow equations,
our MTPGA framework estimates the energy usage of
multithread programs and inserts PPG operations as
power controls for energy management. We performed
experiments by incorporating our power optimization
framework into SUIF compiler tools and by simulating
the energy consumption with a post-estimated SMT
simulator based on Wattch toolkits. The experimental
results show that the total energy consumption of a
system with PPG support and our power optimization
method is reduced by an average of 10.09\% for BSP
programs relative to a system without a power-gating
mechanism on leakage contribution set to 30\%; and the
total energy consumption is reduced by an average of
4.27\% on leakage contribution set to 10\%. The results
demonstrate our mechanisms are effective in reducing
the leakage energy of BSP multithread programs.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Maric:2014:HCD,
author = "Bojan Maric and Jaume Abella and Francisco J. Cazorla
and Mateo Valero",
title = "Hybrid Cache Designs for Reliable Hybrid High and
Ultra-Low Voltage Operation",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "10:1--10:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2658988",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Geometry scaling of semiconductor devices enables the
design of ultra-low-cost (e.g., below 1 USD)
battery-powered resource-constrained ubiquitous devices
for environment, urban life, and body monitoring. These
sensor-based devices require high performance to react
in front of infrequent particular events as well as
extreme energy efficiency in order to extend battery
lifetime during most of the time when low performance
is required. In addition, they require real-time
guarantees. The most suitable technological solution
for these devices consists of using hybrid processors
able to operate at: (i) high voltage to provide high
performance and (ii) near-/subthreshold voltage to
provide ultra-low energy consumption. However, the most
efficient SRAM memories for each voltage level differ
and trading off different SRAM designs is mandatory.
This is particularly true for cache memories, which
occupy most of the processor's area. In this article,
we propose new, simple, single-Vcc-domain hybrid L1
cache architectures suitable for reliable hybrid high
and ultra-low voltage operation. In particular, the
cache is designed by combining heterogeneous SRAM cell
types: some of the cache ways are optimized to satisfy
high-performance requirements during high voltage
operation, whereas the rest of the ways provide
ultra-low energy consumption and reliability during
near-/subthreshold voltage operation. We analyze the
performance, energy, and power impact of the proposed
cache designs when using them to implement L1 caches in
a processor. Experimental results show that our hybrid
caches can efficiently and reliably operate across a
wide range of voltages, consuming little energy at
near-/subthreshold voltage as well as providing high
performance at high voltage without decreasing
reliability levels to provide strong performance
guarantees, as required for our target market.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Baek:2014:DHD,
author = "Seungcheol Baek and Hyung Gyu Lee and Chrysostomos
Nicopoulos and Jongman Kim",
title = "Designing Hybrid {DRAM\slash PCM} Main Memory Systems
Utilizing Dual-Phase Compression",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "11:1--11:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2658989",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The last few years have witnessed the emergence of a
promising new memory technology, namely Phase-Change
Memory (PCM). Due to its inherent ability to scale
deeply into the nanoscale regime and its low power
consumption, PCM is increasingly viewed as an
attractive alternative for the memory subsystem of
future microprocessor architectures. However, PCM is
marred by a duo of potentially show-stopping
deficiencies, that is, poor write performance
(especially when compared to the prevalent and
ubiquitous DRAM technology) and limited durability.
These weaknesses have urged designers to develop
various supporting architectural techniques to aid and
complement the operation of the PCM while mitigating
its innate flaws. One promising such solution is the
deployment of hybridized memory architectures that fuse
DRAM and PCM, in order to combine the best attributes
of each technology. In this article, we introduce a
novel Dual-Phase Compression (DPC) scheme and its
architectural design aimed at DRAM/PCM hybrids, which
caters to the limitations of PCM technology while
optimizing memory performance. The DPC technique is
specifically optimized for PCM-based environments and
is transparent to the operation of the remaining
components of the memory subsystem. Furthermore, the
proposed architecture is imbued with a multifaceted
wear-leveling technique to enhance the durability and
prolong the lifetime of the PCM. Extensive simulations
with traces from real applications running on a
full-system simulator demonstrate 20.4\% performance
improvement and 46.9\% energy reduction, on average, as
compared to a baseline DRAM/PCM hybrid implementation.
Additionally, the multifaceted wear-leveling technique
is shown to significantly prolong the lifetime of the
PCM.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kuo:2014:RCS,
author = "Hsien-Kai Kuo and Bo-Cheng Charles Lai and Jing-Yang
Jou",
title = "Reducing Contention in Shared Last-Level Cache for
Throughput Processors",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "12:1--12:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2676550",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Deploying the Shared Last-Level Cache (SLLC) is an
effective way to alleviate the memory bottleneck in
modern throughput processors, such as GPGPUs. A
commonly used scheduling policy of throughput
processors is to render the maximum possible
thread-level parallelism. However, this greedy policy
usually causes serious cache contention on the SLLC and
significantly degrades the system performance. It is
therefore a critical performance factor that the thread
scheduling of a throughput processor performs a careful
trade-off between the thread-level parallelism and
cache contention. This article characterizes and
analyzes the performance impact of cache contention in
the SLLC of throughput processors. Based on the
analyses and findings of cache contention and its
performance pitfalls, this article formally formulates
the aggregate working-set-size-constrained thread
scheduling problem that constrains the aggregate
working-set size on concurrent threads. With a proof to
be NP-hard, this article has integrated a series of
algorithms to minimize the cache contention and enhance
the overall system performance on GPGPUs. The
simulation results on NVIDIA's Fermi architecture have
shown that the proposed thread scheduling scheme
achieves up to 61.6\% execution time enhancement over a
widely used thread clustering scheme. When compared to
the state-of-the-art technique that exploits the data
reuse of applications, the improvement on execution
time can reach 47.4\%. Notably, the execution time
improvement of the proposed thread scheduling scheme is
only 2.6\% from an exhaustive searching scheme.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sinha:2014:FAI,
author = "Roopak Sinha and Alain Girault and Gregor Goessler and
Partha S. Roop",
title = "A Formal Approach to Incremental Converter Synthesis
for System-on-Chip Design",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "13:1--13:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2663344",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A system-on-chip (SoC) contains numerous intellectual
property blocks, or IPs. Protocol mismatches between
IPs may affect the system-level functionality of the
SoC. Mismatches are addressed by introducing converters
to control inter-IP interactions. Current approaches
towards converter generation find limited practical
application as they use restrictive models, lack formal
rigour, handle a small subset of commonly encountered
mismatches, and/or are not scalable. We propose a
formal technique for SoC design using incremental
converter synthesis. The proposed formulation provides
precise models for protocols and requirements, and
provides a scalable algorithm that allows adding
multiple components and requirements to an SoC
incrementally. We prove that the technique is sound and
complete. Experimental results obtained using real-life
AMBA benchmarks show the scalability and wide range of
mismatches handled by our approach.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Aksoy:2014:MDF,
author = "Levent Aksoy and Paulo Flores and Jose Monteiro",
title = "Multiplierless Design of Folded {DSP} Blocks",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "14:1--14:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2663343",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article addresses the problem of minimizing the
implementation cost of the time-multiplexed constant
multiplication (TMCM) operation that realizes the
multiplication of an input variable by a single
constant selected from a set of multiple constants at a
time. It presents an efficient algorithm, called
orpheus, that finds a multiplierless TMCM design by
sharing logic operators, namely adders, subtractors,
adders/subtractors, and multiplexors (MUXes). Moreover,
this article introduces folded design architectures for
the digital signal processing (DSP) blocks, such as
finite impulse response (FIR) filters and linear DSP
transforms, and describes how these folded DSP blocks
can be efficiently realized using TMCM operations
optimized by orpheus. Experimental results indicate
that orpheus can find better solutions than existing
TMCM algorithms, yielding TMCM designs requiring less
area. They also show that the folded architectures lead
to alternative designs with significantly less area,
but incurring an increase in latency and energy
consumption, compared to the parallel architecture.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{BasiriM:2014:EHB,
author = "Mohamed Asan {Basiri M.} and Noor Mahammad Sk",
title = "An Efficient Hardware-Based Higher Radix Floating
Point {MAC} Design",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "15:1--15:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2667224",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes an effective way of implementing
a multiply accumulate circuit (MAC) for high-speed
floating point arithmetic operations. The real-world
applications related to digital signal processing and
the like demand high-performance computation with
greater accuracy. In general, digital signals are
represented as a sequence of signed/unsigned
fixed/floating point numbers. The final result of a MAC
operation can be computed by feeding the mantissa of
the previous MAC result as one of the partial products
to a Wallace tree multiplier or Braun multiplier. Thus,
the separate accumulation circuit can be avoided by
keeping the circuit depth still within the bounds of
the Wallace tree multiplier, namely $ O (\log_2 n) $,
or Braun multiplier, namely $ O (n) $. In this article,
three kinds of floating point MACs are proposed. The
experimental results show 48.54\% of improvement in
worst path delay achieved by the proposed floating
point MAC using a radix-2 Wallace structure compared
with a conventional floating point MAC without a
pipeline using a 45nm technology library. The same
proposed design gives 39.92\% of improvement in worst
path delay without a pipeline using a radix-4 Braun
structure as compared with a conventional design. In
this article, a radix-32 $ Q_{32.32}$-format-based
floating point MAC is proposed using a Wallace
tree/Braun multiplier. Also this article discusses the
msb prediction problem and its solution in floating
point arithmetic that is not available in modern fused
multiply-add designs. The performance results show
comparisons between the proposed floating point MAC
with various floating point MAC designs for radix-2,
-4, -8, and -16. The proposed design has lesser depth
than a conventional floating point MAC as well as a
lower area requirement than other ways of floating
point MAC implementation, both with/without a
pipeline.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bolchini:2014:DHE,
author = "Cristiana Bolchini and Chiara Sandionigi",
title = "Design of Hardened Embedded Systems on Multi-{FPGA}
Platforms",
journal = j-TODAES,
volume = "20",
number = "1",
pages = "16:1--16:??",
month = nov,
year = "2014",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2676551",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Nov 19 11:18:40 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The aim of this article is the definition of a
reliability-aware methodology for the design of
embedded systems on multi-FPGA platforms. The designed
system must be able to detect the occurrence of faults
globally and autonomously, in order to recover or to
mitigate their effects. Two categories of faults are
identified, based on their impact on the device
elements; (i) recoverable faults, transient problems
that can be fixed without causing a lasting effect
namely and (ii) nonrecoverable faults, those that cause
a permanent problem, making the portion of the fabric
unusable. While some aspects can be taken from previous
solutions available in literature, several open issues
exist. In fact, no complete design methodology handling
all the peculiar issues of the considered scenario has
been proposed yet, a gap we aim at filling with our
work. The final system exposes reliability properties
and increases its overall lifetime and availability.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lu:2015:EEB,
author = "Jingwei Lu and Pengwen Chen and Chin-Chih Chang and Lu
Sha and Dennis Jen-Hsin Huang and Chin-Chi Teng and
Chung-Kuan Cheng",
title = "{ePlace}: Electrostatics-Based Placement Using {Fast
Fourier Transform} and {Nesterov}'s Method",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "17:1--17:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699873",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We develop a flat, analytic, and nonlinear placement
algorithm, ePlace, which is more effective,
generalized, simpler, and faster than previous works.
Based on the analogy between placement instance and
electrostatic system, we develop a novel placement
density function eDensity, which models every object as
positive charge and the density cost as the potential
energy of the electrostatic system. The electric
potential and field distribution are coupled with
density using a well-defined Poisson's equation, which
is numerically solved by spectral methods based on fast
Fourier transform (FFT). Instead of using the conjugate
gradient (CG) nonlinear solver in previous placers, we
propose to use Nesterov's method which achieves faster
convergence. The efficiency bottleneck on line search
is resolved by predicting the steplength using a
closed-form equation of Lipschitz constant. The
placement performance is validated through experiments
on the ISPD 2005 and ISPD 2006 benchmark suites, where
ePlace outperforms all state-of-the-art placers
(Capo10.5, FastPlace3.0, RQL, MAPLE, ComPLx, BonnPlace,
POLAR, APlace3, NTUPlace3, mPL6) with much shorter
wirelength and shorter or comparable runtime. On
average, of all the ISPD 2005 benchmarks, ePlace
outperforms the leading placer BonnPlace with 2.83\%
shorter wirelength and runs 3.05$ \times $ faster; and
on average, of all the ISPD 2006 benchmarks, ePlace
outperforms the leading placer MAPLE with 4.59\%
shorter wirelength and runs 2.84$ \times $ faster.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Guo:2015:RDS,
author = "Qi Guo and Tianshi Chen and Zhi-Hua Zhou and Olivier
Temam and Ling Li and Depei Qian and Yunji Chen",
title = "Robust Design Space Modeling",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2668118",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Architectural design spaces of microprocessors are
often exponentially large with respect to the pending
processor parameters. To avoid simulating all
configurations in the design space, machine learning
and statistical techniques have been utilized to build
regression models for characterizing the relationship
between architectural configurations and responses
(e.g., performance or power consumption). However, this
article shows that the accuracy variability of many
learning techniques over different design spaces and
benchmarks can be significant enough to mislead the
decision-making. This clearly indicates a high risk of
applying techniques that work well on previous modeling
tasks (each involving a design space, benchmark, and
design objective) to a new task, due to which the
powerful tools might be impractical. Inspired by
ensemble learning in the machine learning domain, we
propose a robust framework called ELSE to reduce the
accuracy variability of design space modeling. Rather
than employing a single learning technique as in
previous investigations, ELSE employs distinct learning
techniques to build multiple base regression models for
each modeling task. This is not a trivial combination
of different techniques (e.g., always trusting the
regression model with the smallest error). Instead,
ELSE carefully maintains the diversity of base
regression models and constructs a metamodel from the
base models that can provide accurate predictions even
when the base models are far from accurate.
Consequently, we are able to reduce the number of cases
in which the final prediction errors are unacceptably
large. Experimental results validate the robustness of
ELSE: compared with the widely used artificial neural
network over 52 distinct modeling tasks, ELSE reduces
the accuracy variability by about 62\%. Moreover, ELSE
reduces the average prediction error by 27\% and 85\%
for the investigated MIPS and POWER design spaces,
respectively.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Taouil:2015:YIW,
author = "Mottaqiallah Taouil and Said Hamdioui and Erik Jan
Marinissen",
title = "Yield Improvement for {$3$D} Wafer-to-Wafer Stacked
{ICs} Using Wafer Matching",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699832",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Three-Dimensional Stacked IC (3D-SIC) using
Through-Silicion Vias (TSVs) is an emerging technology
that provides heterogeneous integration, higher
performance, and lower power consumption compared to
traditional ICs. Stacking 3D-SICs using Wafer-to-Wafer
(W2W) has several advantages such as high stacking
throughput, high TSV density, and the ability to handle
thin wafers and small dies. However, it suffers from
low-compound yield as the stacking of good dies on bad
dies and vice versa cannot be prevented. This article
investigates wafer matching as a means for yield
improvement. It first defines a complete wafer matching
framework consisting of different scenarios, each a
combination of a matching process (defines the order of
wafer selection), a matching criterion (defines whether
good or bad dies are matched), wafer rotation (defines
either wafers are rotated or not), and a repository
type. The repository type specifies whether either the
repository is filled immediately after each wafer
selection (i.e., running repository) or after all
wafers are matched (i.e., static repository). A mapping
of prior work on the framework shows that existing
research has mainly explored scenarios based on static
repositories. Therefore, the article analyzes scenarios
based on running repositories. Simulation results show
that scenarios based on running repositories improve
the compound yield with up to 13.4\% relative to random
W2W stacking; the improvement strongly depends on the
number of stacked dies, die yield, repository size, as
well as on the used matching process. Moreover, the
results reveal that scenarios based on running
repositories outperform those of static repositories in
terms of yield improvement at significant runtime
reduction (three orders of magnitude) and lower memory
complexity (from exponential to linear in terms of
stack size).",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2015:CDC,
author = "Naiwen Chang and Eddie Cheng and Sunyuan Hsieh",
title = "Conditional Diagnosability of {Cayley} Graphs
Generated by Transposition Trees under the {PMC}
Model",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "20:1--20:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699854",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Processor fault diagnosis has played an essential role
in measuring the reliability of a multiprocessor
system. The diagnosability of many well-known
multiprocessor systems has been widely investigated.
Conditional diagnosability is a novel measure of
diagnosability by adding a further condition that any
fault set cannot contain all the neighbors of every
node in the system. Several known structural properties
of Cayley graphs are exhibited. Based on these
properties, we investigate the conditional
diagnosability of Cayley graphs generated by
transposition trees under the PMC model and show that
it is 4n-11 for n {$>$}= 4 except for the n
-dimensional star graph for which it has been shown to
be 8 n -21 for n {$>$}= 5 (refer to Chang and Hsieh
[2014]).",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Duan:2015:DDO,
author = "Qing Duan and Jun Zeng and Krishnendu Chakrabarty and
Gary Dispoto",
title = "Data-Driven Optimization of Order Admission Policies
in a Digital Print Factory",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "21:1--21:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699836",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "On-demand digital print service is an example of a
real-time embedded enterprise system. It offers mass
customization and exemplifies personalized
manufacturing services. Once a print order is submitted
to the print factory by a client, the print service
provider (PSP) needs to make a real-time decision on
whether to accept or refuse this order. Based on the
print factory's current capacity and the order's
properties and requirements, an order is refused if its
acceptance is not profitable for the PSP. The order is
accepted with the most appropriate due date in order to
maximize the profit that can result from this order. We
have developed an automated learning-based order
admission framework that can be embedded into an
enterprise environment to provide real-time admission
decisions for new orders. The framework consists of
three classifiers: Support Vector Machine (SVM),
Decision Tree (DT), and Bayesian Probabilistic Model
(BPM). The classifiers are trained by history orders
and used to predict completion status for new orders. A
decision integration technique is implemented to
combine the results of the classifiers and predict due
dates. Experimental results derived using real factory
data from a leading print service provider and Weka
open-source software show that the order completion
status prediction accuracy is significantly improved by
the decision integration strategy. The proposed
multiclassifier model also outperforms a standalone
regression model.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2015:DES,
author = "Cheng-Yen Lin and Chung-Wen Huang and Chi-Bang Kuan
and Shi-Yu Huang and Jenq-Kuen Lee",
title = "The Design and Experiments of a {SID}-Based
Power-Aware Simulator for Embedded Multicore Systems",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "22:1--22:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699834",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Embedded multicore systems are playing increasingly
important roles in the design of consumer electronics.
The objective of such systems is to optimize both
performance and power characteristics of mobile
devices. However, currently there are no power metrics
supporting popular application design platforms (such
as SID) that application developers use to develop
their applications. This hinders the ability of
application developers to optimize power consumption.
In this article we present the design and experiments
of a SID-based power-aware simulation framework for
embedded multicore systems. The proposed power
estimation flow includes two phases: IP-level power
modeling and power-aware system simulation. The first
phase employs PowerMixer$^{IP}$ to construct the power
model for the processor IP and other major IPs, while
the second phase involves a power abstract
interpretation method for summarizing the simulation
trace, then, with a CPE module, estimating the power
consumption based on the summarized trace information
and the input of IP power models. In addition, a
Manager component is devised to map each digital signal
processor (DSP) component to a host thread and maintain
the access to shared resources. The aim is to maintain
the simulation performance as the number of simulated
DSP components increases. A power-profiling API is also
supported that developers of embedded software can use
to tune the granularity of power-profiling for a
specific code section of the target application. We
demonstrate via case studies and experiments how
application developers can use our SID-based power
simulator for optimizing the power consumption of their
applications. We characterize the power consumption of
DSP applications with the DSPstone benchmark and
discuss how compiler optimization levels with SIMD
intrinsics influence the performance and power
consumption. A histogram application and an
augmented-reality application based on human-face-based
RMS (recognition, mining, and synthesis) application
are deployed as running examples on multicore systems
to demonstrate how our power simulator can be used by
developers in the optimization process to illustrate
different views of power dissipations of
applications.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Asadinia:2015:PLP,
author = "Marjan Asadinia and Mohammad Arjomand and Hamid
Sarbazi Azad",
title = "Prolonging Lifetime of {PCM}-Based Main Memories
through On-Demand Page Pairing",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "23:1--23:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699867",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With current memory scalability challenges,
Phase-Change Memory (PCM) is viewed as an attractive
replacement to DRAM. The preliminary concern for PCM
applicability is its limited write endurance that
results in fast wear-out of memory cells. Worse,
process variation in the deep-nanometer regime
increases the variation in cell lifetime, resulting in
an early and sudden reduction in main memory capacity
due to the wear-out of a few cells. Recent studies have
proposed redirection or correction schemes to alleviate
this problem, but all suffer poor throughput or
latency. In this article, we show that one of the
inefficiency sources in current schemes, even when
wear-leveling algorithms are used, is the nonuniform
write endurance limit incurred by process variation,
that is, when some memory pages have reached their
endurance limit, other pages may be far from their
limit. In this line, we present a technique that aims
to displace a faulty page to a healthy page. This
technique, called On-Demand Page Paired PCM (OD3P, for
short), when applied at page level, can improve PCM
time-to-failure by 20\% on average for different
multithreaded and multiprogrammed workloads while also
improving IPC by 14\% on average compared to previous
page-level techniques. The comparison between
line-level OD3P and previous line-level techniques
reveals about 2$ \times $ improvement of lifetime and
performance.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2015:OAA,
author = "Xing Huang and Genggeng Liu and Wenzhong Guo and
Yuzhen Niu and Guolong Chen",
title = "Obstacle-Avoiding Algorithm in {X}-Architecture Based
on Discrete Particle Swarm Optimization for {VLSI}
Design",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "24:1--24:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699862",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Obstacle-avoiding Steiner minimal tree (OASMT)
construction has become a focus problem in the physical
design of modern very large-scale integration (VLSI)
chips. In this article, an effective algorithm is
presented to construct an OASMT based on
X-architecturex for a given set of pins and obstacles.
First, a kind of special particle swarm optimization
(PSO) algorithm is proposed that successfully combines
the classic genetic algorithm (GA), and greatly
improves its own search capability. Second, a
pretreatment strategy is put forward to deal with
obstacles and pins, which can provide a fast
information inquiry for the whole algorithm by
generating a precomputed lookup table. Third, we
present an efficient adjustment method, which enables
particles to avoid all the obstacles by introducing
some corner points of obstacles. Finally, an excellent
refinement method is discussed to further enhance the
quality of the final routing tree, which can improve
the quality of the solution by 7.93\% on average. To
our best knowledge, this is the first time to specially
solve the single-layer obstacle-avoiding problem in
X-architecture. Experimental results show that the
proposed algorithm can further shorten wirelength in
the presence of obstacles. And it achieves the best
solution quality in a reasonable runtime among the
existing algorithms.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2015:MBW,
author = "Hung-Sheng Chang and Yuan-Hao Chang and Pi-Cheng Hsiu
and Tei-Wei Kuo and Hsiang-Pang Li",
title = "Marching-Based Wear-Leveling for {PCM}-Based Storage
Systems",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "25:1--25:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699831",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Improving the performance of storage systems without
losing the reliability and sanity/integrity of file
systems is a major issue in storage system designs. In
contrast to existing storage architectures, we consider
a PCM-based storage architecture to enhance the
reliability of storage systems. In PCM-based storage
systems, the major challenge falls on how to prevent
the frequently updated (meta)data from wearing out
their residing PCM cells without excessively searching
and moving metadata around the PCM space and without
extensively updating the index structures of file
systems. In this work, we propose an adaptive
wear-leveling mechanism to prevent any PCM cell from
being worn out prematurely by selecting appropriate
data for swapping with constant search/sort cost.
Meanwhile, the concept of indirect pointers is designed
in the proposed mechanism to swap data without any
modification to the file system's indexes. Experiments
were conducted based on well-known benchmarks and
realistic workloads to evaluate the effectiveness of
the proposed design, for which the results are
encouraging.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2015:APB,
author = "Gang Chen and Kai Huang and Christian Buckl and Alois
Knoll",
title = "Applying Pay-Burst-Only-Once Principle for Periodic
Power Management in Hard Real-Time Pipelined
Multiprocessor Systems",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "26:1--26:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699865",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Pipelined computing is a promising paradigm for
embedded system design. Designing a power management
policy to reduce the power consumption of a pipelined
system with nondeterministic workload is, however,
nontrivial. In this article, we study the problem of
energy minimization for coarse-grained pipelined
systems under hard real-time constraints and propose
new approaches based on an inverse use of the
pay-burst-only-once principle. We formulate the problem
by means of the resource demands of individual pipeline
stages and propose two new approaches, a quadratic
programming-based approach and fast heuristic, to solve
the problem. In the quadratic programming approach, the
problem is transformed into a standard quadratic
programming with box constraint and then solved by a
standard quadratic programming solver. Observing the
problem is NP-hard, the fast heuristic is designed to
solve the problem more efficiently. Our approach is
scalable with respect to the numbers of pipeline
stages. Simulation results using real-life applications
are presented to demonstrate the effectiveness of our
methods.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yonga:2015:ABE,
author = "Franck Yonga and Michael Mefenza and Christophe
Bobda",
title = "{ASP}-Based Encoding Model of Architecture Synthesis
for Smart Cameras in Distributed Networks",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "27:1--27:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2701419",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A synthesis approach based on Answer Set Programming
(ASP) for heterogeneous system-on-chips to be used in
distributed camera networks is presented. In such
networks, the tight resource limitations represent a
major challenge for application development. Starting
with a high-level description of applications, the
physical constraints of the target devices, and the
specification of network configuration, our goal is to
produce optimal computing infrastructures made of a
combination of hardware and software components for
each node of the network. Optimization aims at
maximizing speed while minimizing chip area and power
consumption. Additionally, by performing the
architecture synthesis simultaneously for all cameras
in the network, we are able to minimize the overall
utilization of communication resources and consequently
reduce power consumption. Because of its
reconfiguration capabilities, a Field Programmable Gate
Array (FPGA) has been chosen as the target device,
which enhances the exploration of several design
alternatives. We present several realistic network
scenarios to evaluate and validate the proposed
synthesis approach.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2015:AIP,
author = "Lok-Won Kim and Dong-U Lee and John Villasenor",
title = "Automated Iterative Pipelining for {ASIC} Design",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "28:1--28:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2660768",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We describe an automated pipelining approach for
optimally balanced pipeline implementation that
achieves low area cost as well as meeting timing
requirements. Most previous automatic pipelining
methods have focused on Instruction Set Architecture
(ISA)-based designs and the main goal of such methods
generally has been maximizing performance as measured
in terms of instructions per clock (IPC). By contrast,
we focus on datapath-oriented designs (e.g., DSP
filters for image or communication processing
applications) in ASIC design flows. The goal of the
proposed pipelining approach is to find the optimally
pipelined design that not only meets the user-specified
target clock frequency, but also seeks to minimize area
cost of a given design. Unlike most previous
approaches, the proposed methods incorporate the use of
accurate area and timing information (iteratively
achieved by synthesizing every interim pipelined
design) to achieve higher accuracy during design
exploration. When compared with exhaustive design
exploration that considers all possible pipeline
patterns, the two heuristic pipelining methods
presented here involve only a small area penalty
(typically under 5\%) while offering dramatically
reduced computational complexity. Experimental
validation is performed with commercial ASIC design
tools and described for applications including
polynomial function evaluation, FIR filters, matrix
multiplication, and discrete wavelet transform filter
designs with a 90nm standard cell library.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2015:GDU,
author = "Irith Pomeranz",
title = "A Generalized Definition of Unnecessary Test Vectors
in Functional Test Sequences",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "29:1--29:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699853",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A class of static test compaction procedures for
functional test sequences is based on the omission of
unnecessary test vectors. According to the definition
used by these procedures, a test vector is unnecessary
if all the target faults continue to be detected after
it is omitted. This article introduces a more general
definition of unnecessary test vectors that allows
additional ones to be omitted. According to this
definition, a test vector is unnecessary if every
target fault can be detected by a sequence that is
obtained after omitting the vector, and possibly other
vectors. The article develops a procedure for omitting
test vectors based on this definition and discusses its
effects on the storage requirements and test
application time.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Baranowski:2015:RSN,
author = "Rafal Baranowski and Michael A. Kochte and
Hans-Joachim Wunderlich",
title = "Reconfigurable Scan Networks: Modeling, Verification,
and Optimal Pattern Generation",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "30:1--30:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699863",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Efficient access to on-chip instrumentation is a key
requirement for post-silicon validation, test, debug,
bringup, and diagnosis. Reconfigurable scan networks,
as proposed by, for example, IEEE Std 1687-2014 and
IEEE Std 1149.1-2013, emerge as an effective and
affordable means to cope with the increasing complexity
of on-chip infrastructure. Reconfigurable scan networks
are often hierarchical and may have complex structural
and functional dependencies. Common approaches for scan
verification based on static structural analysis and
functional simulation are not sufficient to ensure
correct operation of these types of architectures. To
access an instrument in a reconfigurable scan network,
a scan-in bit sequence must be generated according to
the current state and structure of the network. Due to
sequential and combinational dependencies, the access
pattern generation process ( pattern retargeting )
poses a complex decision and optimization problem. This
article presents the first generalized formal model
that considers structural and functional dependencies
of reconfigurable scan networks and is directly
applicable to 1687-2014-based and 1149.1-2013-based
scan architectures. This model enables efficient formal
verification of complex scan networks, as well as
automatic generation of access patterns. The proposed
pattern generation method supports concurrent access to
multiple target scan registers ( access merging ) and
generates short scan-in sequences.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Beznia:2015:TAR,
author = "Kamel Beznia and Ahcene Bounceur and Reinhardt Euler
and Salvador Mir",
title = "A Tool for Analog\slash {RF BIST} Evaluation Using
Statistical Models of Circuit Parameters",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "31:1--31:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699837",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Testing analog integrated circuits is expensive in
terms of both test equipment and time. To reduce the
cost, Design-For-Test techniques (DFT) such as Built-In
Self-Test (BIST) have been developed. For a given
Circuit Under Test (CUT), the choice of a suitable
technique should be made at the design stage as a
result of the analysis of test metrics such as test
escapes and yield loss. However, it is very hard to
carry out this estimation for analog/RF circuits by
using fault simulation techniques. Instead, the
estimation of parametric test metrics is made possible
by Monte Carlo circuit-level simulations and the
construction of statistical models. These models
represent the output parameter space of the CUT in
which the test metrics are defined. In addition, models
of the input parameter space may be required to
accelerate the simulations and obtain higher confidence
in the DFT choices. In this work, we describe a
methodological flow for the selection of most adequate
statistical models and several techniques that can be
used for obtaining these models. Some of these
techniques have been integrated into a Computer-Aided
Test (CAT) tool for the automation of the process of
test metrics estimation. This estimation is illustrated
for the case of a BIST solution for CMOS imager pixels
that requires the use of advanced statistical modeling
techniques.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gupte:2015:FAT,
author = "Adwait Gupte and Sudhanshu Vyas and Phillip H. Jones",
title = "A Fault-Aware Toolchain Approach for {FPGA} Fault
Tolerance",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "32:1--32:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699838",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the size and density of silicon chips continue to
increase, maintaining acceptable manufacturing yields
has become increasingly difficult. Recent works suggest
that lithography techniques are reaching their limits
with respect to enabling high yield fabrication of
small-scale devices, thus there is an increasing need
for techniques that can tolerate fabrication time
defects. One candidate technology to help combat these
defects is reconfigurable hardware. The flexible nature
of reconfigurable devices, such as Field Programmable
Gate Arrays (FPGAs), makes it possible for them to
route around defective areas of a chip after the device
has been packaged and deployed into the field. This
work presents a technique that aims to increase the
effective yield of FPGA manufacturing by re-claiming a
portion of chips that would be ordinarily classified as
unusable. In brief, we propose a modification to
existing commercial toolchain flows to make them fault
aware. A phase is added to identify faults within the
chip. The locations of these faults are then used by
the toolchain to avoid faults during the placement and
routing phase. Specifically, we have applied our
approach to the Xilinx commercial toolchain flow and
evaluated its tolerance to both logic and routing
resource faults. Our findings show that, at a cost of
5--10\% in device frequency performance, the modified
toolchain flow can tolerate up to 30\% of logic
resources being faulty and, depending on the nature of
the target application, can tolerate 1--30\% of the
device's routing resources being faulty. These results
provide strong evidence that commercial toolchains not
designed for the purpose of tolerating faults can still
be greatly leveraged in the presence of faults to place
and route circuits in an efficient manner.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2015:RBA,
author = "Jiliang Zhang and Yaping Lin and Gang Qu",
title = "Reconfigurable Binding against {FPGA} Replay Attacks",
journal = j-TODAES,
volume = "20",
number = "2",
pages = "33:1--33:??",
month = feb,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699833",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 3 14:46:37 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The FPGA replay attack, where an attacker downgrades
an FPGA-based system to the previous version with known
vulnerabilities, has become a serious security and
privacy concern for FPGA design. Current FPGA
intellectual property (IP) protection mechanisms target
the protection of FPGA configuration bitstreams by
watermarking or encryption or binding. However, these
mechanisms fail to prevent replay attacks. In this
article, based on a recently reported PUF-FSM binding
method that protects the usage of configuration
bitstreams, we propose to reconfigure both the physical
unclonable functions (PUFs) and the locking scheme of
the finite state machine (FSM) in order to defeat the
replay attack. We analyze the proposed scheme and
demonstrate how replay attack would fail in attacking
systems protected by the reconfigurable binding method.
We implement two ways to build reconfigurable PUFs and
propose two practical methods to reconfigure the
locking scheme. Experimental results show that the two
reconfigurable PUFs can generate significantly distinct
responses with average reconfigurability of more than
40\%. The reconfigurable locking schemes only incur a
timing overhead less than 1\%.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Srivastav:2015:DUL,
author = "Meeta Srivastav and Mohammed Ehteshamuddin and Kyle
Stegner and Leyla Nazhandali",
title = "Design of Ultra-Low Power Scalable-Throughput
Many-Core {DSP} Applications",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "34:1--34:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2720018",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a system-level solution in designing
process variation aware (PVA) scalable-throughput
many-core systems for energy constrained applications.
In our proposed methodology, we leverage the benefits
of voltage scaling for obtaining energy efficiency
while compensating for the loss in throughput by
exploiting parallelism present in various DSP designs.
We demonstrate that such a hybrid method consumes
6.27\%--28.15\% less power as compared to simple
dynamic voltage scaling over different workload
environments. Design details of a prototype chip
fabricated on 90 nm technology node and its findings
are presented. Chip consists of 8 homogeneous FIR
cores, which are capable of running from near-threshold
to nominal voltages. In our 20 chip population, we
observe 7\% variation in speed among the cores at
nominal voltage (0.9V) and 26\% at near threshold
voltage (0.55V). We also observe 54\% variation in
power consumption of the cores. For any desired
throughput, the optimum number of cores and their
optimum operating voltage is chosen based on the speed
and power characteristics of the cores present inside
the chip. We will also present analysis on
energy-efficiency of such systems based on changes in
ambient temperature.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jafari:2015:LUD,
author = "Fahimeh Jafari and Zhonghai Lu and Axel Jantsch",
title = "Least Upper Delay Bound for {VBR} Flows in
Networks-on-Chip with Virtual Channels",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "35:1--35:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2733374",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Real-time applications such as multimedia and gaming
require stringent performance guarantees, usually
enforced by a tight upper bound on the maximum
end-to-end delay. For FIFO multiplexed on-chip packet
switched networks we consider worst-case delay bounds
for Variable Bit-Rate (VBR) flows with aggregate
scheduling, which schedules multiple flows as an
aggregate flow. VBR Flows are characterized by a
maximum transfer size ($L$), peak rate ($p$),
burstiness ($ \sigma $), and average sustainable rate
($ \rho $). Based on network calculus, we present and
prove theorems to derive per-flow end-to-end Equivalent
Service Curves (ESC), which are in turn used for
computing Least Upper Delay Bounds (LUDBs) of
individual flows. In a realistic case study we find
that the end-to-end delay bound is up to 46.9\% more
accurate than the case without considering the traffic
peak behavior. Likewise, results also show similar
improvements for synthetic traffic patterns. The
proposed methodology is implemented in C++ and has low
run-time complexity, enabling quick evaluation for
large and complex SoCs.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bombieri:2015:MRR,
author = "Nicola Bombieri and Franco Fummi and Sara Vinco",
title = "A Methodology to Recover {RTL IP} Functionality for
Automatic Generation of {SW} Applications",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "36:1--36:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2720019",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the advent of heterogeneous multiprocessor
system-on-chips (MPSoCs), hardware/software
partitioning is again on the rise both in research and
in product development. In this new scenario,
implementing intellectual-property (IP) blocks as SW
applications rather than dedicated HW is an increasing
trend to fully exploit the computation power provided
by the MPSoC CPUs. On the other hand, whole libraries
of IP blocks are available as RTL descriptions, most of
them without a corresponding high-level SW
implementation. In this context, this article presents
a methodology to automatically generate SW applications
in C++, by starting from existing RTL IPs implemented
in hardware description language (HDL). The methodology
exploits an abstraction algorithm to eliminate
implementation details typical of HW descriptions (such
as cycle-accurate functionality and data types) to
guarantee relevant performance of the generated code.
The experimental results show that, in many cases, the
C++ code automatically generated in a few seconds with
the proposed methodology is as efficient as the
corresponding code manually implemented from scratch.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Holst:2015:HTL,
author = "Stefan Holst and Michael E. Imhof and Hans-Joachim
Wunderlich",
title = "High-Throughput Logic Timing Simulation on {GPGPUs}",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "37:1--37:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2714564",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Many EDA tasks such as test set characterization or
the precise estimation of power consumption, power
droop and temperature development, require a very large
number of time-aware gate-level logic simulations.
Until now, such characterizations have been feasible
only for rather small designs or with reduced precision
due to the high computational demands. The new
simulation system presented here is able to accelerate
such tasks by more than two orders of magnitude and
provides for the first time fast and comprehensive
timing simulations for industrial-sized designs.
Hazards, pulse-filtering, and pin-to-pin delay are
supported for the first time in a GPGPU accelerated
simulator, and the system can easily be extended to
even more realistic delay models and further
applications. A sophisticated mapping with efficient
memory utilization and access patterns as well as
minimal synchronizations and control flow divergence is
able to use the full potential of GPGPU architectures.
To provide such a mapping, we combine for the first
time the versatility of event-based timing simulation
and multi-dimensional parallelism used in GPU-based
gate-level simulators. The result is a
throughput-optimized timing simulation algorithm, which
runs many simulation instances in parallel and at the
same time fully exploits gate-parallelism within the
circuit.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xu:2015:DCD,
author = "Tong Xu and Peng Li and Savithri Sundareswaran",
title = "Decoupling Capacitance Design Strategies for Power
Delivery Networks with Power Gating",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "38:1--38:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2700825",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power gating is a widely used leakage power saving
strategy in modern chip designs. However, power gating
introduces unique power integrity issues and trade-offs
between switching and rush current (wake-up) supply
noises. At the same time, the amount of power saving
intrinsically trades off with power integrity. In
addition, these trade-offs significantly vary with
supply voltage. In this article, we propose systemic
decoupling capacitors (decaps) optimization strategies
that optimally trade-off between power integrity and
leakage saving. Specially, new global decap and
reroutable decap design concepts are proposed to relax
the tight interaction between power integrity and
leakage saving of power gated PDNs with a single supply
voltage level. Furthermore, we propose a flexible decap
allocation technique to deal with the design trade-offs
under multiple supply voltage levels. The proposed
strategies are implemented in an automatic design flow
for choosing the optimal amount of local decaps, global
decaps and reroutable decaps. The conducted experiments
demonstrate that leakage saving can be increased
significantly compared with the conventional PDN design
approach with a single supply voltage level using the
proposed techniques without jeopardizing power
integrity. For PDN designs operating at two supply
voltage levels, the optimal performance is achieved at
each voltage level.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Firouzi:2015:AVA,
author = "Farshad Firouzi and Fangming Ye and Krishnendu
Chakrabarty and Mehdi B. Tahoori",
title = "Aging- and Variation-Aware Delay Monitoring Using
Representative Critical Path Selection",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "39:1--39:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2746237",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Process together with runtime variations in
temperature and voltage, as well as transistor aging,
degrade path delay and may eventually induce circuit
failure due to timing variations. Therefore, in-field
tracking of path delays is essential, and to respond to
this need, several delay sensor designs have been
proposed in the literature. However, due to the
significant overhead of these sensors and the large
number of critical paths in today's IC, it is
infeasible to monitor the delay of every critical path
in silicon. We present an aging- and variation-aware
representative path selection technique based on
machine learning that allows to measure the delay of a
small set of paths and infer the delay of a larger pool
of paths that are likely to fail due to delay
variations. Simulation results for benchmark circuits
highlight the accuracy of the proposed approach for
predicting critical-path delay based on the selected
representative paths.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Park:2015:SGA,
author = "Heejong Park and Avinash Malik and Zoran Salcic",
title = "Scheduling Globally Asynchronous Locally Synchronous
Programs for Guaranteed Response Times",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "40:1--40:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2740961",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Safety-critical software systems need to guarantee
functional correctness and bounded response times to
external input events. Programs designed using reactive
programming languages, based on formal mathematical
semantics, can be automatically verified for functional
correctness guarantees. Real-time guarantees on the
other hand are much harder to achieve. In this article
we provide a static analysis framework for guaranteeing
response times for reactive programs developed using
the Globally Asynchronous Locally Synchronous (GALS)
model of computation. The proposed approach is
applicable to scheduling of GALS programs for different
target architectures with single or multiple processors
or cores. A Satisfiability Modulo Theory (SMT)
formulation in the quantifier free linear real
arithmetic (QF\_LRA) logic is used for scheduling. A
novel technique to encode rendezvous used in
synchronization of globally asynchronous processes in
the presence of locally synchronous parallelism and
arbitrary preemption into QF\_LRA logic is presented.
Finally, our SMT formulation is shown to produce
schedules in reasonable time.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yi:2015:ESF,
author = "Qiuping Yi and Zijiang Yang and Jian Liu and Chen Zhao
and Chao Wang",
title = "Explaining Software Failures by Cascade Fault
Localization",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "41:1--41:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2738038",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/gnu.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "During software debugging, a significant amount of
effort is required for programmers to identify the root
cause of a manifested failure. In this article, we
propose a cascade fault localization method to help
speed up this labor-intensive process via a combination
of weakest precondition computation and constraint
solving. Our approach produces a cause tree, where each
node is a potential cause of the failure and each edge
represents a casual relationship between two causes.
There are two main contributions of this article that
differentiate our approach from existing methods.
First, our method systematically computes all potential
causes of a failure and augments each cause with a
proper context for ease of comprehension by the user.
Second, our method organizes the potential causes in a
tree structure to enable on-the-fly pruning based on
domain knowledge and feedback from the user. We have
implemented our new method in a software tool called
CaFL, which builds upon the LLVM compiler and KLEE
symbolic virtual machine. We have conducted experiments
on a large set of public benchmarks, including real
applications from GNU Coreutils and Busybox. Our
results show that in most cases the user has to examine
only a small fraction of the execution trace before
identifying the root cause of the failure.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2015:SLO,
author = "Jong Chul Lee and Roman Lysecky",
title = "System-Level Observation Framework for Non-Intrusive
Runtime Monitoring of Embedded Systems",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "42:1--42:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2717310",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the complexity of embedded systems rapidly
increases, the use of traditional analysis and debug
methods encounters significant challenges in
monitoring, analyzing, and debugging the complex
interactions of various software and hardware
components. This situation is further exacerbated for
in-situ debugging and verification in which traditional
debug and trace interfaces that require physical access
are unavailable, infeasible, or cost prohibitive. In
this article, we present a system-level observation
framework that provides minimally intrusive methods for
dynamically monitoring and analyzing deeply integrated
hardware and software components within embedded
systems. The system-level observation framework
monitors hardware and software events by inserting
additional logic for detecting designer-specified
events within hardware cores to observe complex
interaction across hardware and software boundaries at
runtime, and provides visibility for monitoring complex
execution behavior of software applications without
affecting the system execution.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2015:LRR,
author = "Qi Zhang and Xuandong Li and Linzhang Wang and Tian
Zhang and Yi Wang and Zili Shao",
title = "{Lazy-RTGC}: a Real-Time Lazy Garbage Collection
Mechanism with Jointly Optimizing Average and Worst
Performance for {NAND} Flash Memory Storage Systems",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "43:1--43:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2746236",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to many attractive and unique properties, NAND
flash memory has been widely adopted in
mission-critical hard real-time systems and some soft
real-time systems. However, the nondeterministic
garbage collection operation in NAND flash memory makes
it difficult to predict the system response time of
each data request. This article presents Lazy-RTGC, a
real-time lazy garbage collection mechanism for NAND
flash memory storage systems. Lazy-RTGC adopts two
design optimization techniques: on-demand page-level
address mappings, and partial garbage collection.
On-demand page-level address mappings can achieve high
performance of address translation and can effectively
manage the flash space with the minimum RAM cost. On
the other hand, partial garbage collection can provide
the guaranteed system response time. By adopting these
techniques, Lazy-RTGC jointly optimizes both the
average and the worst system response time, and
provides a lower bound of reclaimed free space.
Lazy-RTGC is implemented in FlashSim and compared with
representative real-time NAND flash memory management
schemes. Experimental results show that our technique
can significantly improve both the average and worst
system performance with very low extra flash-space
requirements.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sharma:2015:AIE,
author = "Namita Sharma and Preeti Ranjan Panda and Francky
Catthoor and Praveen Raghavan and Tom {Vander Aa}",
title = "Array Interleaving --- An Energy-Efficient Data Layout
Transformation",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "44:1--44:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2747875",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Optimizations related to memory accesses and data
storage make a significant difference to the
performance and energy of a wide range of
data-intensive applications. These techniques need to
evolve with modern architectures supporting wide memory
accesses. We investigate array interleaving, a data
layout transformation technique that achieves energy
efficiency by combining the storage of data elements
from multiple arrays in contiguous locations, in an
attempt to exploit spatial locality. The transformation
reduces the number of memory accesses by loading the
right set of data into vector registers, thereby
minimizing redundant memory fetches. We perform a
global analysis of array accesses, and account for
possibly different array behavior in different loop
nests that might ultimately lead to changes in data
layout decisions for the same array across program
regions. Our technique relies on detailed estimates of
the savings due to interleaving, and also the cost of
performing the actual data layout modifications. We
also account for the vector register widths and the
possibility of choosing the appropriate granularity for
interleaving. Experiments on several benchmarks show a
6--34\% reduction in memory energy due to the
strategy.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Roy:2015:LAM,
author = "Sudip Roy and Partha P. Chakrabarti and Srijan Kumar
and Krishnendu Chakrabarty and Bhargab B.
Bhattacharya",
title = "Layout-Aware Mixture Preparation of Biochemical Fluids
on Application-Specific Digital Microfluidic Biochips",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "45:1--45:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2714562",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The recent proliferation of digital microfluidic (DMF)
biochips has enabled rapid on-chip implementation of
many biochemical laboratory assays or protocols. Sample
preprocessing, which includes dilution and mixing of
reagents, plays an important role in the preparation of
assays. The automation of sample preparation on a
digital microfluidic platform often mandates the
execution of a mixing algorithm, which determines a
sequence of droplet mix-split steps (usually
represented as a mixing graph). However, the overall
cost and performance of on-chip mixture preparation not
only depends on the mixing graph but also on the
resource allocation and scheduling strategy, for
instance, the placement of boundary reservoirs or
dispensers, mixer modules, storage units, and physical
design of droplet-routing pathways. In this article, we
first present a new mixing algorithm based on a
number-partitioning technique that determines a
layout-aware mixing tree corresponding to a given
target ratio of a number of fluids. The mixing graph
produced by the proposed method can be implemented on a
chip with a fewer number of crossovers among
droplet-routing paths as well as with a reduced
reservoir-to-mixer transportation distance. Second, we
propose a routing-aware resource-allocation scheme that
can be used to improve the performance of a given
mixing algorithm on a chip layout. The design
methodology is evaluated on various test cases to
demonstrate its effectiveness in mixture preparation
with the help of two representative mixing algorithms.
Simulation results show that on average, the proposed
scheme can reduce the number of crossovers among
droplet-routing paths by 89.7\% when used in
conjunction with the new mixing algorithm, and by
75.4\% when an earlier algorithm [Thies et al. 2008] is
used.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Suresh:2015:AGU,
author = "Chandra K. H. Suresh and Sule Ozev and Ozgur
Sinanoglu",
title = "Adaptive Generation of Unique {IDs} for Digital Chips
through Analog Excitation",
journal = j-TODAES,
volume = "20",
number = "3",
pages = "46:1--46:??",
month = jun,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2732408",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Aug 7 08:47:44 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Globalization of the integrated circuit design and
manufacturing flow has successfully ameliorated design
complexity and fabrication cost challenges, and helped
deliver cost-effective products while meeting stringent
time-to-market requirements. On the flip side, it has
resulted in various forms of security vulnerabilities
in the supply chain that involves designers, fabs, test
facilities, and distributors until the end-product
reaches customers. One of the biggest threats to
semiconductor industry today is the entry of aged,
reject, or cloned parts, that is, counterfeit chips,
into the supply chain, leading to annual revenue losses
in the order of billions of dollars. While traceability
of chips between trusted parties can help monitor the
supply chain at various points in the flow, existing
solutions are in the form of integrating costly
hardware units on chip, or utilizing easy-to-circumvent
inspection-based detection techniques. In this article,
we propose a technique for adaptive unique ID
generation that leverages process variations, enabling
chip traceability. The proposed method stimulates
digital chips with an analog signal from the supply
lines, which serve as primary inputs to each gate in
the signal path. Using a sinusoidal signal that
exercises the transistors as gain components, we create
a chip-specific response that can be post-processed
into a digital ID. The proposed technique enables quick
and cost-effective authenticity validation that
requires no on-chip hardware support. Our simulation
and experimentation on actual chips show that the
proposed technique is capable of generating unique IDs
even in the presence of environmental noise.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2015:MBF,
author = "Hai-Bao Chen and Ying-Chi Li and Sheldon X.-D. Tan and
Xin Huang and Hai Wang and Ngai Wong",
title = "{$H$}-Matrix-Based Finite-Element-Based Thermal
Analysis for {$3$D} {ICs}",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "47:1--47:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2714563",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose an efficient
finite-element-based (FE-based) method for both steady
and transient thermal analyses of high-performance
integrated circuits based on the hierarchical matrix (
H -matrix) representation. H -matrix has been shown to
provide a data-sparse way to approximate the matrices
and their inverses with almost linear-space and time
complexities. In this work, we apply the H -matrix
concept for solving heating diffusion problems modeled
by parabolic partial differential equations (PDEs)
based on the finite element method. We show that the
matrix from a FE-based steady and transient thermal
analysis can be represented by H -matrix without any
approximation, and its inverse and Cholesky factors can
be evaluated by H -matrix with controlled accuracy. We
then show and prove that the memory and time
complexities of the solver are bounded by O ( k$_1$ N
log N ) and O ( k$_1^2$ N log$^2$ N ), respectively,
where k$_1$ is a small quantity determined by accuracy
requirements and N is the number of unknowns in the
system. The comparison with existing product-quality LU
solvers, CSPARSE and UMFPACK, on a number of 3D IC
thermal matrices, shows that the new method is much
more memory efficient than these methods, which however
prevents CPU time comparison with those methods on
large examples. But the proposed method can solve all
the given thermal circuits with decent scalabilities,
which shows good agreement with the predicted
theoretical results.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Heyse:2015:TTM,
author = "Karel Heyse and Brahim {Al Farisi} and Karel Bruneel
and Dirk Stroobandt",
title = "{TCONMAP}: Technology Mapping for Parameterised {FPGA}
Configurations",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "48:1--48:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2751558",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Parameterised configurations are FPGA configuration
bitstreams in which the bits are defined as functions
of user-defined parameters. From a parameterised
configuration, it is possible to quickly and
efficiently derive specialised, regular configuration
bitstreams by evaluating these functions. The
specialised bitstreams have different properties and
functionality depending on the chosen values of the
parameters. The most important application of
parameterised configurations is the generation of
specialised configuration bitstreams for Dynamic
Circuit Specialisation, a technique for optimising
circuits at runtime using partial reconfiguration of
the FPGA. Generating and using parameterised
configurations requires a new FPGA tool flow. In this
article, we present a new technology mapping algorithm
for parameterised designs, called TCONMAP, that can be
used to produce parameterised configurations in which
both the configuration of the logic blocks and routing
is a function of the parameters. In our experiments, we
demonstrate that in using TCONMAP, the depth and area
of the mapped circuit is close to the minimal depth and
area attainable. Both Dynamic Circuit Specialisation
and fine-grained modular reconfiguration are extracted
by TCONMAP from the HDL description of the design
requiring only simple parameter annotations.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Peter:2015:CBS,
author = "Steffen Peter and Tony Givargis",
title = "Component-Based Synthesis of Embedded Systems Using
Satisfiability Modulo Theories",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "49:1--49:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2746235",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Constraint programming solvers, such as Satisfiability
Modulo Theory (SMT) solvers, are capable tools in
finding preferable configurations for embedded systems
from large design spaces. However, constructing SMT
constraint programs is not trivial, in particular for
complex systems that exhibit multiple viewpoints and
models. In this article we propose CoDeL: a
component-based description language that allows system
designers to express components as reusable building
blocks of the system with their parameterizable
properties, models, and interconnectivity. Systems are
synthesized by allocating, connecting, and
parameterizing the components to satisfy the
requirements of an application. We present an algorithm
that transforms component-based design spaces,
expressible in CoDeL, to an SMT program, which, solved
by state-of-the-art SMT solvers, determines the
satisfiability of the synthesis problem, and delivers a
correct-by-construction system configuration.
Evaluation results for use cases in the domain of
scheduling and mapping of distributed real-time
processes confirm, first, the performance gain of SMT
compared to traditional design space exploration
approaches, second, the usability gains by expressing
design problems in CoDeL, and third, the capability of
the CoDeL/SMT approach to support the design of
embedded systems.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mirtar:2015:AAA,
author = "Ali Mirtar and Sujit Dey and Anand Raghunathan",
title = "An Application Adaptation Approach to Mitigate the
Impact of Dynamic Thermal Management on Video
Encoding",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "50:1--50:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2753758",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to limitations of cooling methods such as using
fan and heat sink, dynamic thermal management (DTM) is
being widely adopted to manage the temperature of
computing systems. However, application of DTM can
reduce the system performance and thereby affect the
quality of real-time applications. Real-time video
encoding, which has high computational need and hard
deadlines, is a commonly used application that can be
severely affected by the usage of DTM. We study the
effect of DTM on a widely used H.264 video encoder and
formulate a multidimensional optimization problem to
maximize video quality and minimize bit rate while
ensuring that the video encoder can run in real time in
spite of DTM effects. We model the effects of adapting
encoding parameters on video quality, bit rate, and
encoder speed. We propose a dynamic application
adaptation method to efficiently solve the optimization
problem by optimally adapting the encoding parameters
in response to DTM effects. In addition, we show that
the proposed dynamic application adaptation method
would reduce the need for cooling methods such as
forced convection cooling. We implement the proposed
approach on an Intel\reg CoreTM 2 Duo platform where
dynamic voltage and frequency scaling (DVFS) is used
for DTM. Our measurements with several videos reveal
that when DTM is applied, the video quality is affected
significantly. However, using the proposed adaptation
algorithm, the encoder can run in real time, and the
quality loss is minimized with only a marginal increase
in the bit rate.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2015:VPI,
author = "Da-Wei Chang and Hsin-Hung Chen and Wei-Jian Su",
title = "{VSSD}: Performance Isolation in a Solid-State Drive",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "51:1--51:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2755560",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Performance isolation is critical in shared storage
systems, a popular storage solution. In a shared
storage system, interference between requests from
different users can affect the accuracy of I/O cost
accounting, resulting in poor performance isolation.
Recently, NAND flash-memory-based solid-state drives
(SSDs) have been increasingly used in shared storage
systems. However, interference in SSD-based shared
storage systems has not been addressed. In this
article, two types of interference, namely, queuing
delay (QD) interference and garbage collection (GC)
interference, are identified in a shared SSD.
Additionally, a framework called VSSD is proposed to
address these types of interference. VSSD is composed
of two components: the FACO credit-based I/O scheduler
designed to address QD interference and the ViSA flash
translation layer designed to address GC interference.
The VSSD framework aims to be implemented in the
firmware running on an SSD controller. With VSSD,
interference in an SSD can be eliminated and
performance isolation can be ensured. Both synthetic
and application workloads are used to evaluate the
effectiveness of the proposed VSSD framework. The
performance results show the following. First, QD and
GC interference exists and can result in poor
performance isolation between users on SSD-based shared
storage systems. Second, VSSD is effective in
eliminating the interference and achieving performance
isolation between users. Third, the overhead of VSSD is
insignificant.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Duan:2015:AAP,
author = "Qing Duan and Abhishek Koneru and Jun Zeng and
Krishnendu Chakrabarty and Gary Dispoto",
title = "Accurate Analysis and Prediction of Enterprise
Service-Level Performance",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "52:1--52:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2757279",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "An enterprise service-level performance time series is
a sequence of data points that quantify demand,
throughput, average order-delivery time, quality of
service, or end-to-end cost. Analytical and predictive
models of such time series can be embedded into an
enterprise information system (EIS) in order to provide
meaningful insights into potential business problems
and generate guidance for appropriate solutions.
Time-series analysis includes periodicity detection,
decomposition, and correlation analysis. Time-series
prediction can be modeled as a regression problem to
forecast a sequence of future time-series datapoints
based on the given time series. The state-of-the-art
(baseline) methods employed in time-series prediction
generally apply advanced machine-learning algorithms.
In this article, we propose a new univariate method for
dealing with midterm time-series prediction. The
proposed method first analyzes the hierarchical
periodic structure in one time series and decomposes it
into trend, season, and noise components. By discarding
the noise component, the proposed method only focuses
on predicting repetitive season and smoothed trend
components. As a result, this method significantly
improves upon the performance of baseline methods in
midterm time-series prediction. Moreover, we propose a
new multivariate method for dealing with short-term
time-series prediction. The proposed method utilizes
cross-correlation information derived from multiple
time series. The amount of data taken from each time
series for training the regression model is determined
by results from hierarchical cross-correlation
analysis. Such a data-filtering strategy leads to
improved algorithm efficiency and prediction accuracy.
By combining statistical methods with advanced
machine-learning algorithms, we have achieved a
significantly superior performance in both short-term
and midterm time-series predictions compared to
state-of-the-art (baseline) methods.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Heo:2015:IAS,
author = "Ingoo Heo and Minsu Kim and Yongje Lee and Changho
Choi and Jinyong Lee and Brent Byunghoon Kang and
Yunheung Paek",
title = "Implementing an Application-Specific Instruction-Set
Processor for System-Level Dynamic Program Analysis
Engines",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "53:1--53:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2746238",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In recent years, dynamic program analysis (DPA) has
been widely used in various fields such as profiling,
finding bugs, and security. However, existing solutions
have their own weaknesses. Software solutions provide
flexibility in DPA but they suffer from tremendous
performance overhead. In contrast, core-level hardware
engines rely on specialized integrated logics and
attain extremely fast computation, but they have a
limited functional extensibility because the logics are
tightly coupled with the host processor. To mend this,
a prior system-level approach utilizes an existing
channel to integrate their hardware without
necessitating the host architecture modification and
introduced great potential in performance.
Nevertheless, the prior work does not address the
detailed design and implementation of the engine, which
is quite essential to leverage the deployment on real
systems. To address this, in this article, we propose
an implementation of programmable DPA hardware engine,
called program analysis unit (PAU). PAU is an
application-specific instruction-set processor (ASIP)
whose instruction set is customized to reflect common
features of various DPA methods. With the specialized
architecture and programmability of software, our PAU
aims at fast computation and sufficient flexibility. In
our case studies on several DPA techniques, we show
that our ASIP approach can be successfully applicable
to complex DPA schemes while providing hardware-backed
power in performance and software-based flexibility in
analysis. Recent experiments on our FPGA prototype
revealed that the performance of PAU is 4.7-13.6 times
faster than pure software DPA, and the power/area
consumption is also acceptably small compared to
today's mobile processors.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jiang:2015:CLF,
author = "Lei Jiang and Bo Zhao and Jun Yang and Youtao Zhang",
title = "Constructing Large and Fast On-Chip Cache for Mobile
Processors with Multilevel Cell {STT--MRAM}
Technology",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "54:1--54:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2764903",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern mobile processors integrating an increasing
number of cores into one single chip demand
large-capacity, on-chip, last-level caches (LLCs) in
order to achieve scalable performance improvements.
However, adopting traditional memory technologies such
as SRAM and embedded DRAM (eDRAM) leakage and
scalability problems. Spin-transfer torque magnetic RAM
(STT-MRAM) is a novel nonvolatile memory technology
that has emerged as a promising alternative for
constructing on-chip caches in high-end mobile
processors. STT-MRAM has many advantages, such as short
read latency, zero leakage from the memory cell, and
better scalability than eDRAM and SRAM. Multilevel cell
(MLC) STT-MRAM further enlarges capacity and reduces
per-bit cost by storing more bits in one cell. However,
MLC STT-MRAM has long write latency which limits the
effectiveness of MLC STT-MRAM-based LLCs. In this
article, we address this limitation with three novel
designs: line pairing (LP), line swapping (LS), and
dynamic LP/LS enabler (DLE). LP forms fast cache lines
by reorganizing MLC soft bits which are faster to
write. LS dynamically stores frequently-written data
into these fast cache lines. We then propose a dynamic
LP/LS enabler (DLE) to enable LP and LS only if they
help to improve the overall cache performance. Our
experimental results show that the proposed designs
improve system performance by 9--15\% and reduce energy
consumption by 14--21\% for various types of mobile
processors.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Samavatian:2015:ALL,
author = "Mohammad Hossein Samavatian and Mohammad Arjomand and
Ramin Bashizade and Hamid Sarbazi-Azad",
title = "Architecting the Last-Level Cache for {GPUs} using
{STT}-{RAM} Technology",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "55:1--55:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2764905",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Future GPUs should have larger L2 caches based on the
current trends in VLSI technology and GPU architectures
toward increase of processing core count. Larger L2
caches inevitably have proportionally larger power
consumption. In this article, having investigated the
behavior of GPGPU applications, we present an efficient
L2 cache architecture for GPUs based on STT-RAM
technology. Due to its high-density and low-power
characteristics, STT-RAM technology can be utilized in
GPUs where numerous cores leave a limited area for
on-chip memory banks. They have, however, two important
issues, high energy and latency of write operations,
that have to be addressed. Low retention time STT-RAMs
can reduce the energy and delay of write operations.
Nevertheless, employing STT-RAMs with low retention
time in GPUs requires a thorough study on the behavior
of GPGPU applications. Based on this investigation, we
have architectured a two-part STT-RAM-based L2 cache
with low-retention (LR) and high-retention (HR) parts.
The proposed two-part L2 cache exploits a dynamic
threshold regulator (DTR) to efficiently regulate the
write threshold for migration of the data blocks from
HR to LR, based on the behavior of the applications.
Also, a Data and Access type Aware Cache Search
mechanism (DAACS) is hired for handling the search of
the requested data blocks in two parts of the cache.
The STT-RAM L2 cache architecture proposed in this
article can improve IPC by up to 171\% (20\% on
average), and reduce the average consumed power by
28.9\% compared to a conventional L2 cache architecture
with equal on-chip area.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Indrusiak:2015:FSN,
author = "Leandro Soares Indrusiak and James Harbin and Osmar
{Marchi Dos Santos}",
title = "Fast Simulation of Networks-on-Chip with
Priority-Preemptive Arbitration",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "56:1--56:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2755559",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "An increasingly time-consuming part of the design flow
of on-chip multiprocessors is the simulation of the
interconnect architecture. The accurate simulation of
state-of-the art network-on-chip interconnects can take
hours, and this process is repeated for each design
iteration because it provides valuable insights on
communication latencies that can greatly affect the
overall performance of the system. In this article, we
identify a time-predictable network-on-chip
architecture and show that its timing behaviour can be
predicted using models which are far less complex than
the architecture itself. We then explore such a feature
to produce simplified and lightweight simulation models
that can produce latency figures with more than 90\%
accuracy and simulate more than 1,000 times faster when
compared to a cycle-accurate model of the same
interconnect.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2015:FES,
author = "Irith Pomeranz",
title = "{FOLD}: Extreme Static Test Compaction by Folding of
Functional Test Sequences",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "57:1--57:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2764455",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article introduces a new approach to extreme
static test compaction for functional test sequences
that modifies the sequence in order to enhance the
ability to omit test vectors from it and thus compact
it. In the new approach, modification of the sequence
and omission of test vectors from it are tightly
coupled by focusing both subprocedures on subsequences
of limited lengths. In a new process that is referred
to as folding, a subsequence is partitioned into two
halves, and the goal of the modification is to ensure
that the two halves are as similar as possible. With
similar halves, the expectation is that it will be
possible to omit test vectors from the subsequence.
Experimental results demonstrate that the procedure
produces extremely short functional test sequences for
benchmark circuits.",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2015:BST,
author = "Ran Wang and Krishnendu Chakrabarty and Sudipta
Bhawmik",
title = "Built-In Self-Test and Test Scheduling for
Interposer-Based {$ 2.5 $D IC}",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "58:1--58:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2757278",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Interposer-based 2.5D integrated circuits (ICs) are
seen today as a precursor to 3D ICs based on
through-silicon vias (TSVs). All the dies and the
interposer in a 2.5D IC must be adequately tested for
product qualification. We present an efficient built-in
self-test (BIST) architecture for targeting defects in
dies and in the interposer interconnects. The proposed
BIST architecture can also be used for fault diagnosis
during interconnect testing. To reduce the overall test
cost, we describe a test scheduling and optimization
technique under power constraints. We present
simulation results to validate the BIST architecture
and demonstrate fault detection, synthesis results to
evaluate the area overhead of the proposed BIST
architecture, and test scheduling results to highlight
the effectiveness of the optimization approach.",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bahar:2015:ISI,
author = "R. Iris Bahar and Alex K. Jones and Yuan Xie",
title = "Introduction to the Special Issue on Reliable,
Resilient, and Robust Design of Circuits and Systems",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "59:1--59:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2796541",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kiddie:2015:SEM,
author = "Bradley T. Kiddie and William H. Robinson and Daniel
B. Limbrick",
title = "Single-Event Multiple-Transient Characterization and
Mitigation via Alternative Standard Cell Placement
Methods",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "60:1--60:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2740962",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As fabrication technology scales towards smaller
transistor sizes and lower critical charge,
single-event radiation effects are more likely to cause
errant behavior in multiple, physically adjacent
devices in modern integrated circuits (ICs), and with
higher operating frequencies, this risk increasingly
impacts design logic over memory as well. In order to
increase future system reliability, circuit designers
need greater awareness of multiple-transient
charge-sharing effects during the early stages of their
design flow with standard cell placement and routing.
To measure the propagation and observability of
multiple transients from single radiation events, this
work uses several intra-pipeline combinational logic
circuits at the 32nm technology node, investigates
several different standard cell placements of each
design, and analyzes those placements with a novel,
physically realistic transient injection and simulation
method. It is shown that (1) this simulation
methodology, informed by experimental data, provides an
increased realism over other works in traditional fault
injection fields, (2) different placements of the same
circuit where standard cells are grouped by logical
hierarchy can result in different reliability behavior
and benefits especially useful within the area of
approximate computing, and (3) improved reliability
through charge-sharing transient mitigation can be
gained with no area penalty and minimal speed and power
penalties by adjusting the placement of standard
cells.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Delshadtehrani:2015:SMR,
author = "Leila Delshadtehrani and Hamed Farbeh and Seyed
Ghassem Miremadi",
title = "In-Scratchpad Memory Replication: Protecting
Scratchpad Memories in Multicore Embedded Systems
against Soft Errors",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "61:1--61:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2770874",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scratchpad memories (SPMs) are widely employed in
multicore embedded processors. Reliability is one of
the major constraints in the embedded processor design,
which is threatened with the increasing susceptibility
of memory cells to multiple-bit upsets (MBUs) due to
continuous technology down-scaling. This article
proposes a low-cost and efficient data replication
mechanism, called In-Scratchpad Memory Replication
(ISMR), to correct MBUs in SPMs of multicore embedded
processors. The main feature of ISMR is a smart
controller, called Replication Management Unit (RMU),
which is responsible for dynamically analyzing the
activity of the SPM blocks at runtime and efficiently
replicating the vulnerable SPM blocks into currently
inactive SPM blocks. RMU exploits a 2-bit tag for each
SPM block, where the value of each tag is determined by
RMU according to the SPM access pattern. Accordingly,
the proposed mechanism guarantees the replication of
all vulnerable SPM blocks to provide error correction
without decreasing the SPM utilization. To detect
errors in SPM blocks, ISMR uses a 2-bit
interleaved-parity code. As compared with the previous
E-RAID 1 mechanism, the simulation results illustrate
that for an 8-core embedded processor, the ISMR
mechanism experiences 81\% less energy consumption
overhead and 48\% less performance overhead.",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Papandreou:2015:ERM,
author = "Nikolaos Papandreou and Thomas Parnell and Haralampos
Pozidis and Thomas Mittelholzer and Evangelos
Eleftheriou and Charles Camp and Thomas Griffin and
Gary Tressler and Andrew Walls",
title = "Enhancing the Reliability of {MLC NAND} Flash Memory
Systems by Read Channel Optimization",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "62:1--62:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2699866",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "NAND flash memory is not only the ubiquitous storage
medium in consumer applications but has also started to
appear in enterprise storage systems as well. MLC and
TLC flash technology made it possible to store multiple
bits in the same silicon area as SLC, thus reducing the
cost per amount of data stored. However, at current
sub-20nm technology nodes, MLC flash devices fail to
provide the levels of raw reliability, mainly cycling
endurance, that are required by typical enterprise
applications. Advanced signal processing and coding
schemes are needed to improve the flash bit error rate
and thus elevate the device reliability to the desired
level. In this article, we report on the use of
adaptive voltage thresholds and cell-to-cell
interference cancellation in the read operation of NAND
flash devices. We discuss how the optimal read voltage
thresholds can be determined and assess the benefit of
cancelling cell-to-cell interference in terms of
cycling endurance, data retention, and resilience to
read disturb.",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xu:2015:ICF,
author = "Cong Xu and Dimin Niu and Yang Zheng and Shimeng Yu
and Yuan Xie",
title = "Impact of Cell Failure on Reliable Cross-Point
Resistive Memory Design",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "63:1--63:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2753759",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Resistive random access memory (ReRAM) technology is
an emerging candidate for next-generation nonvolatile
memory (NVM) architecture due to its simple structure,
low programming voltage, fast switching speed, high
on/off ratio, excellent scalability, good endurance,
and great compatibility with silicon CMOS technology.
The most attractive of the characteristics of ReRAM is
its cross-point structure, which features a 4 F$^2$
cell size. In a cross-point structure, the existence of
sneak current and resulting voltage loss due to the
wire's resistance might cause read and write failures
if not designed properly. In addition, a robust ReRAM
design needs to deal with both soft and hard errors. In
this article, we summarize mechanisms of both soft and
hard errors of ReRAM cells and propose a unified model
to characterize different failure behaviors. We
quantitatively analyze the impact of cell failure types
on the reliability of the cross-point array. We also
propose an error-resilient architecture, which avoids
unnecessary writes in the hard error detection unit.
Assuming constant soft error rate, our approach can
extend the lifetime of ReRAM up to 75\% over a design
without hard error detection and up to 12\% over the
design with a ``write-verify'' detection mechanism. Our
approach yields greater significant lifetime
improvement when considering postcycling retention
degradation.",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2015:RLP,
author = "Renyuan Zhang and Mineo Kaneko",
title = "Robust and Low-Power Digitally Programmable Delay
Element Designs Employing Neuron-{MOS} Mechanism",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "64:1--64:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2740963",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The feasibility of designing digitally programmable
delay elements (PDEs) employing neuron-MOS mechanism is
investigated in this work. By coupling the capacitors
on the gate of the MOS transistor, the current flowing
through the transistor can be digitally tuned without
additional static power consumption. Various switching
delays are generated by a clock buffer stage in this
manner. Two types of neuron-MOS-based PDEs are
suggested in this article. One of them is realized by
directly applying capacitor-coupling technology on the
transistors of an inverter as a clock buffer. The delay
programmability is realized by tuning the
charging/discharging current through the neuron-MOS
inverter digitally. Since no additional transistor is
introduced into the charging/discharging path, the
performance fluctuation due to process variations on
MOS transistors is reduced. The temperature effect is
also partially compensated by the proposed neuron-MOS
implementation. Another type of PDE circuit is proposed
by employing a reliable reference-current-generator,
where the neuron-MOS transistor acts as a linearly
tunable resistance. A stable reference current is
generated and used for charging/discharging the
inverter as a clock buffer. As a result, the switching
delay of the inverter is linearly programmed by digital
input patterns. In general, both types of suggested PDE
circuits achieve improved or fair performances over the
robustness, power consumption, and linearity.",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2015:UIL,
author = "Hyungjun Kim and Siva Bhanu Krishna Boga and Arseniy
Vitkovskiy and Stavros Hadjitheophanous and Paul V.
Gratz and Vassos Soteriou and Maria K. Michael",
title = "Use It or Lose It: Proactive, Deterministic Longevity
in Future Chip Multiprocessors",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "65:1--65:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2770873",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Moore's Law scaling continues to yield higher
transistor density with each succeeding process
generation, leading to today's many-core chip
multiprocessors (CMPs) with tens or even hundreds of
interconnected cores or tiles. Unfortunately, deep
submicron CMOS process technology is marred by
increasing susceptibility to wear. Prolonged
operational stress gives rise to accelerated wearout
and failure due to several physical failure mechanisms,
including hot-carrier injection (HCI) and negative-bias
temperature instability (NBTI). Each failure mechanism
correlates with different usage-based stresses, all of
which can eventually generate permanent faults. While
the wearout of an individual core in many-core CMPs may
not necessarily be catastrophic, a single fault in the
interprocessor network-on-chip (NoC) fabric could
render the entire chip useless, as it could lead to
protocol-level deadlocks, or even partition away vital
components such as the memory controller or other
critical I/O. In this article, we study HCI- and
NBTI-induced wear due to actual stresses caused by real
workloads, applied onto the interconnect
microarchitecture and develop a critical path model for
NBTI-induced wearout. A key finding of this modeling is
that, counter to prevailing wisdom, wearout in the
CMP's on-chip interconnect is correlated with lack of
load observed in the NoC routers rather than high load.
We then develop a novel wearout-decelerating scheme in
which routers under low load have their wear-sensitive
components exercised without significantly impacting
cycle time, pipeline depth, area, or power consumption
of the overall router. A novel deterministic approach
is proposed for the generation of appropriate
exercise-mode data, ensuring design parameter targets
are met. We subsequently show that the proposed design
yields an $\approx$2,300$ \times $ decrease in the rate
of wear.",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kahng:2015:IMR,
author = "Andrew B. Kahng and Seokhyeong Kang and Jiajia Li and
Jose {Pineda De Gyvez}",
title = "An Improved Methodology for Resilient Design
Implementation",
journal = j-TODAES,
volume = "20",
number = "4",
pages = "66:1--66:??",
month = sep,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2749462",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Sep 29 08:53:54 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Resilient design techniques are used to (i) ensure
correct operation under dynamic variations and to (ii)
improve design performance (e.g., timing speculation).
However, significant overheads (e.g., 16\% and 14\%
energy penalties due to throughput degradation and
additional circuits) are incurred by existing resilient
design techniques. For instance, resilient designs
require additional circuits to detect and correct
timing errors. Further, when there is an error, the
additional cycles needed to restore a previous correct
state degrade throughput, which diminishes the
performance benefit of using resilient designs. In this
work, we describe an improved methodology for resilient
design implementation to minimize the costs of
resilience in terms of power, area, and throughput
degradation. Our methodology uses two levers:
selective-endpoint optimization (i.e.,
sensitivity-based margin insertion) and clock skew
optimization. We integrate the two optimization
techniques in an iterative optimization flow which
comprehends toggle rate information and the trade-off
between cost of resilience and margin on combinational
paths. Since the error-detection network can result in
up to 9\% additional wirelength cost, we also propose a
matching-based algorithm for construction of the
error-detection network to minimize this resilience
overhead. Further, our implementations comprehend the
impacts of signoff corners (in particular, hold
constraints, and use of typical vs. slow libraries) and
process variation, which are typically omitted in
previous studies of resilience trade-offs. Our proposed
flow achieves energy reductions of up to 21\% and 10\%
compared to a conventional (with only margin used to
attain robustness) design and a brute-force
implementation (i.e., a typical resilient design, where
resilient endpoints are (greedily) instantiated at
timing-critical endpoints), respectively. We show that
these benefits increase in the context of an adaptive
voltage scaling strategy.",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Roy:2015:FTE,
author = "Debashri Roy and Prasun Ghosal and Saraju Mohanty",
title = "{FuzzRoute}: a Thermally Efficient Congestion-Free
Global Routing Method for Three-Dimensional Integrated
Circuits",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "1:1--1:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2767127",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The high density of interconnects, closer proximity of
modules, and routing phase are pivotal during the
layout of a performance-centric three-dimensional
integrated circuit (3D IC). Heuristic-based approaches
are typically used to handle such NP-complete problems
of global routing in 3D ICs. To overcome the inherent
limitations of deterministic approaches, a novel
methodology for multi-objective global routing based on
fuzzy logic has been proposed in this article. The
guiding information generated after the placement phase
is used during routing with the help of a fuzzy expert
system to achieve thermally efficient and
congestion-free routing. A complete global routing
solution is designed based on the proposed algorithms
and the results are compared with selected fully
established global routers, namely Labyrinth,
FastRoute3.0, NTHU-R, BoxRouter 2.0, FGR,
NTHU-Route2.0, FastRoute4.0, NCTU-GR, MGR, and
NCTU-GR2.0. Experiments are performed over ISPD 1998
and 2008 benchmarks. The proposed router, called
FuzzRoute, achieves balanced superiority in terms of
routability, runtime, and wirelength over others. The
improvements on routing time for Labyrinth, BoxRouter
2.0, and FGR are 91.81\%, 86.87\%, and 32.16\%,
respectively, for ISPD 1998 benchmarks. It may be noted
that, though FastRoute3.0 achieves fastest runtime, it
fails to generate congestion-free solutions for all
benchmarks, which is overcome by the proposed FuzzRoute
of the current article. It also shows wirelength
improvements of 17.35\%, 2.88\%, 2.44\%, 2.83\%, and
2.10\%, respectively, over others for ISPD 1998
benchmarks. For ISPD 2008 benchmark circuits it also
provides 2.5\%, 2.6\%, 1 \%, 1.1\%, and 0.3\% lesser
wirelength and averagely runs 1.68$ \times $, 6.42$
\times $, 2.21$ \times $, 0.76$ \times $, and 1.54$
\times $ faster than NTHU-Route2.0, FastRoute4.0,
NCTU-GR, MGR, and NCTU-GR2.0, respectively.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2015:LDP,
author = "Ye Zhang and Wai-Shing Luk and Yunfeng Yang and Hai
Zhou and Changhao Yan and David Z. Pan and Xuan Zeng",
title = "Layout Decomposition with Pairwise Coloring and
Adaptive Multi-Start for Triple Patterning
Lithography",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "2:1--2:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2764904",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article we present a pairwise coloring (PWC)
approach to tackle the layout decomposition problem for
triple patterning lithography (TPL). The main idea is
to reduce the problem to a set of bi-coloring problems.
The overall solution is refined by applying a
bi-coloring method for pairs of color sets per pass.
One obvious advantage of this method is that the
existing double patterning lithography (DPL) techniques
can be reused effortlessly. Moreover, we observe that
each pass can be fulfilled efficiently by integrating
an SPQR-tree-graph-division-based bi-coloring method.
In addition, to prevent the solution getting stuck in
the local minima, an adaptive multi-start (AMS)
approach is incorporated. Adaptive starting points are
generated according to the vote of previous solutions.
The experimental results show that our method is
competitive with other works on both solution quality
and runtime performance.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2015:DMD,
author = "Hu Chen and Sanghamitra Roy and Koushik Chakraborty",
title = "{DARP-MP}: Dynamically Adaptable Resilient Pipeline
Design in Multicore Processors",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "3:1--3:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2755558",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we demonstrate that the sensitized
path delays in various microprocessor pipe stages
exhibit intriguing temporal and spatial variations
during the execution of real-world applications. To
effectively exploit these delay variations, we propose
dynamically adaptable resilient pipeline (DARP)-a
series of runtime techniques to boost power-performance
efficiency and fault tolerance in a pipelined
microprocessor. DARP employs early error prediction to
avoid a major portion of the timing errors. We combine
DARP with the state-of-art topologically homogeneous
and power-performance heterogeneous (THPH) architecture
to build up a new frontier for the energy efficiency of
multicore processors (DARP-MP). Using a rigorous
circuit-architectural infrastructure, we demonstrate
that DARP substantially improves the multicore
processor performance (9.4--20\%) and energy efficiency
(10--28.6\%) compared to state-of-the-art techniques.
The energy-efficiency improvements of DARP-MP are 42\%
and 49.9\% compared against the original THPH and
another state-of-art multicore power management scheme,
respectively.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2015:MMS,
author = "Myungsun Kim and Jinkyu Koo and Hyojung Lee and James
R. Geraci",
title = "Memory Management Scheme to Improve Utilization
Efficiency and Provide Fast Contiguous Allocation
without a Statically Reserved Area",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "4:1--4:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2770871",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Fast allocation of large blocks of physically
contiguous memory plays a crucial role to boost the
performance of multimedia applications in modern
memory-constrained portable devices, such as
smartphones, tablets, etc. Existing systems have
addressed this issue by provisioning a large statically
reserved memory area (SRA) in which only dedicated
applications can allocate pages. However, this in turn
degrades the performance of applications that are
prohibited to utilize the SRA due to the reduced
available memory pool. To overcome this drawback while
maintaining the benefits of the SRA, we propose a new
memory management scheme that uses a special memory
region, called page-cache-preferred area (PCPA), in
concert with a quick memory reclaiming algorithm. The
key of the proposed scheme is to enhance the memory
utilization efficiency by enabling to allocate
page-cached pages of all applications in the PCPA until
predetermined applications require to allocate big
chunks of contiguous memory. At this point, clean
page-cached pages in the PCPA are rapidly evicted
without write-back to a secondary storage. Compared to
the SRA scheme, experimental results show that the
average launch time of real-world applications and the
execution time of I/O-intensive benchmarks are reduced
by 9.2\% and 24.7\%, respectively.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Oboril:2015:EIS,
author = "Fabian Oboril and Mehdi B. Tahoori",
title = "Exploiting Instruction Set Encoding for Aging-Aware
Microprocessor Design",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "5:1--5:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2783435",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Microprocessors fabricated at nanoscale nodes are
exposed to accelerated transistor aging due to bias
temperature instability and hot carrier injection. As a
result, device delays increase over time, reducing the
mean time to failure (MTTF) and hence lifetime of the
processor. To address this challenge, many
(micro)-architectural techniques target the execution
stage of the instruction pipeline, as this one is
typically most critical. However, also the decoding
stages can become aging critical and limit the
microprocessor lifetime, as we will show in this work.
Therefore, we propose a novel aging-aware instruction
set-encoding methodology (ArISE) that improves the
instruction encoding iteratively using a heuristic
algorithm. In addition, the switching activities of the
affected memory elements are considered in order to
co-optimize lifetime and energy efficiency. Our
experimental results show that MTTF of the decoding
stages can be improved by 2.3$ \times $ with negligible
implementation costs.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{More:2015:LAN,
author = "Ankit More and Baris Taskin",
title = "Locality-Aware Network Utilization Balancing in
{NoCs}",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "6:1--6:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2743012",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Hierarchical and multi-network networks-on-chip (NoCs)
have been proposed in the literature to improve the
energy- and performance-efficient scalability of the
traditional flat-mesh NoC architecture. Theoretically,
based on a small-world network-based analysis,
traditional hierarchical NoCs are expected to provide
good scalability. However, the traditional theoretical
analysis (e.g. for small-worldness) does not take into
account the congestion phenomenon experienced in such
networks. Counterintuitively, as shown in this work,
breaking the hierarchy in traditional hierarchical NoCs
and utilizing the proposed locality-aware network
utilization (NU) balancing technique performs better.
This improvement in performance is observed through
experimental analysis, which is contrasted with the
theoretical analysis that does not account for
congestion. In addition to the novelties for
hierarchical networks, the application of the proposed
locality-aware NU balancing scheme is extended to
multi-network NoC topologies (with already separated
networks). Results of the analysis show the superiority
of applying the locality-aware NU balancing technique
for a throughput and energy-efficient scaling of the
multi-network NoC architectures, much like those of the
hierarchical NoCs. For instance, for a NoC with 1024
nodes, the proposed NU balancing technique provides up
to 95\% higher throughput efficiency and consumes up to
29\% less energy per flit compared to the best NoC
topology without the NU balancing technique. The
analysis also helps to render the choice of a NoC
topology for traffic patterns varying in locality and
nonlocality on exascale computing CMPs.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cheng:2015:ABW,
author = "Hsiang-Yun Cheng and Mary Jane Irwin and Yuan Xie",
title = "Adaptive Burst-Writes {(ABW)}: Memory Requests
Scheduling to Reduce Write-Induced Interference",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "7:1--7:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2753757",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Main memory latencies have become a major performance
bottleneck for chip-multiprocessors (CMPs). Since reads
are on the critical path, existing memory controllers
prioritize reads over writes. However, writes must be
eventually processed when the write queue is full.
These writes are serviced in a burst to reduce the bus
turnaround delay and increase the row-buffer locality.
Unfortunately, a large number of reads may suffer long
queuing delay when the burst-writes are serviced. The
long write latency of future nonvolatile memory will
further exacerbate the long queuing delay of reads
during burst-writes. In this article, we propose a
run-time mechanism, Adaptive Burst-Writes (ABW), to
reduce the queuing delay of reads. Based on the
row-buffer hit rate of writes and the arrival rate of
reads, we dynamically control the number of writes
serviced in a burst to trade off the write service time
and the queuing latency of reads. For prompt
adjustment, our history-based mechanism further
terminates the burst-writes earlier when the row-buffer
hit rate of writes in the previous burst-writes is low.
As a result, our policy improves system throughput by
up to 28\% (average 10\%) and 43\% (average 14\%) in
CMPs with DRAM-based and PCM-based main memory.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ochoa-Ruiz:2015:MAR,
author = "Gilberto Ochoa-Ruiz and S{\'e}bastien Guillet and
Florent {De Lamotte} and Eric Rutten and El-Bay
Bourennane and Jean-Philippe Diguet and Guy Gogniat",
title = "An {MDE} Approach for Rapid Prototyping and
Implementation of Dynamic Reconfigurable Systems",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "8:1--8:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2800784",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a co-design methodology based on
RecoMARTE, an extension to the well-known UML MARTE
profile, which is used for the specification and
automatic generation of Dynamic and Partially
Reconfigurable Systems-on-Chip (DRSoC). This endeavor
is part of a larger framework in which Model-Driven
Engineering (MDE) techniques are extensively used for
modeling and via model transformations, generating
executable models, which are exploited by
implementation tools to create reconfigurable systems.
More specifically, the methodological aspects presented
in this article are concerned with expediting the
conception and implementation of the hardware platform
and the integration of correct by construction
reconfiguration controller. This article builds upon
previous research by integrating previously separated
endeavors to obtain a complete PR system generation
chain, which aims at shielding the designer of many of
the burdensome technological and tool-specific
requirements. The methodology permits for the
verification of the platform description at different
stages in the development process (i.e., HDL for
simulation, static FPGA implementation, controller
simulation and verification). Furthermore, automation
capabilities embedded in the flow enable the generation
of the platform description and the integration of the
reconfiguration controller executive seamlessly. In
order to demonstrate the benefits of the proposed
approach, we present a case study in which we target
the creation of an image-processing application to be
deployed onto an FPGA board. We present the required
modeling strategies and we discuss how the generation
chains are integrated with the back-end Xilinx tools
(the most mature version of PR technology) to produce
the necessary executable artifacts: VHDL for the
platform description and a C description of the
reconfiguration controller to be executed by an
embedded processor.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2015:CPM,
author = "Shih-Hsu Huang and Hua-Hsin Yeh and Yow-Tyng Nieh",
title = "Clock Period Minimization with Minimum Leakage Power",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "9:1--9:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2778954",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the design of nonzero clock skew circuits, an
increase of the short-path delay may improve circuit
speed or reduce leakage power. However, the impact of
increasing the short-path delay on the trade-off
between circuit speed and leakage power has not been
well studied. An analysis of previous works shows that
they can be classified into two independent groups. One
group uses extra buffers to increase the short-path
delay for achieving the lower bound of the clock
period; however, this group has a large overhead of
leakage power. The other group uses the combination of
threshold voltage assignment and gate sizing (TVA/GS)
to increase the short-path delay as possible for
reducing leakage power; however, this group often does
not work with the lower bound of the clock period.
Accordingly, this article considers the simultaneous
application of buffer insertion and TVA/GS during clock
skew scheduling. Our objective is to minimize the
leakage power for working with the lower bound of the
clock period. To the best of our knowledge, our
approach is the first leakage-power-aware clock skew
scheduling that guarantees working with the lower bound
of the clock period. Benchmark data consistently show
that our approach achieves good results in terms of
both the circuit speed and the leakage power.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Subramaniam:2015:FPM,
author = "Anupama R. Subramaniam and Janet Roveda and Yu Cao",
title = "A Finite-Point Method for Efficient Gate
Characterization Under Multiple Input Switching",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "10:1--10:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2778970",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Timing characterization of standard cells is one of
the essential steps in VLSI design. The traditional
static timing analysis (STA) tool assumes single input
switching models for the characterization of multiple
input gates. However, due to technology scaling,
increasing operating frequency, and process variation,
the probability of the occurrence of multiple input
switching (MIS) is increasing. On the other hand,
considering all possible MIS scenarios for the
characterization of multiple input logic gates, is
computationally intensive. To improve the efficiency,
this work proposes a finite-point-based
characterization methodology for multiple input gates
with the effects of MIS. Furthermore, delay variation
due to MIS is integrated into the STA flow through
propagation of switching windows. The proposed modeling
methodology is validated using benchmark circuits at
the 45nm technology node for various operating
conditions. Experimental results demonstrate
significant reduction in computation cost and data
volume with less than $\approx$10\% error compared to
that of traditional SPICE simulation.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jung:2015:LMS,
author = "Dongha Jung and Hokyoon Lee and Seon Wook Kim",
title = "Lowering Minimum Supply Voltage for Power-Efficient
Cache Design by Exploiting Data Redundancy",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "11:1--11:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2795229",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Voltage scaling is known to be an efficient way of
saving power and energy within a system, and large
caches such as LLCs are good candidates for voltage
scaling considering their constantly increasing size.
However, the V$^{CCMIN}$ problem, in which the lower
bound of scalable voltage is limited by process
variation, has made it difficult to exploit the
benefits of voltage scaling. Lowering V$^{CCMIN}$
incurs multibit faults, which cannot be efficiently
resolved by current technologies due to their high
complexity and power consumption. We overcame the
limitation by exploiting the data redundancy of memory
hierarchy. For example, cache coherence states and
several layers of cache organization naturally expose
the existence of redundancy within cache blocks. If
blocks have redundant copies, their V$^{CCMIN}$ can be
lowered; although more faults can occur in the blocks,
they can be efficiently detected by simple error
detection codes and recovered by reloading the
redundant copies. Our scheme requires only minor
modifications to the existing cache design. We verified
our proposal on a cycle accurate simulator with
SPLASH-2 and PARSEC benchmark suites and found that the
V$^{CCMIN}$ of a 2MB L2 cache can be further lowered by
0.1V in 32nm technology with negligible degradation in
performance. As a result, we could achieve 15.6\% of
reduction in dynamic power and 15.4\% of reduction in
static power compared to the previous minimum power.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Qin:2015:CSE,
author = "Ying Qin and Shengyu Shen and Qingbo Wu and Huadong
Dai and Yan Jia",
title = "Complementary Synthesis for Encoder with Flow Control
Mechanism",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "12:1--12:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2794079",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Complementary synthesis automatically generates an
encoder's decoder with the assumption that the
encoder's all input variables can always be uniquely
determined by its output symbol sequence. However, to
prevent the faster encoder from overwhelming the slower
decoder, many encoders employ flow control mechanism
that fails this assumption. Such encoders, when their
output symbol sequences are too fast to be processed by
the decoders, will stop transmitting data symbols, but
instead transmitting idle symbols that can only
uniquely determine a subset of the encoder's input
variables. And the decoder should recognize and discard
these idle symbols. This mechanism fails the assumption
of all complementary synthesis algorithms, because some
input variables can't be uniquely determined by the
idle symbol. A novel algorithm is proposed to handle
such encoders. First, it identifies all input variables
that can be uniquely determined, and takes them as flow
control variables. Second, it infers a predicate over
these flow control variables that enables all other
input variables to be uniquely determined. Third, it
characterizes the decoder's Boolean function with Craig
interpolant. Experimental results on several complex
encoders indicate that this algorithm can always
correctly identify the flow control variables, infer
the predicates and generate the decoder's Boolean
functions.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2015:ETC,
author = "Irith Pomeranz",
title = "Enhanced Test Compaction for Multicycle Broadside
Tests by Using State Complementation",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "13:1--13:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2778953",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicycle tests support test compaction by allowing
each test to detect more target faults. The ability of
multicycle broadside tests to provide test compaction
depends on the ability of primary input sequences to
take the circuit between pairs of states that are
useful for detecting target faults. This ability can be
enhanced by adding design-for-testability (DFT) logic
that allows states to be complemented. This article
describes a test compaction procedure that uses such
DFT logic to form a compact multicycle broadside test
set for transition faults where the tests use constant
primary input vectors. The use of complemented states
also allows the procedure to increase the transition
fault coverage beyond the transition fault coverage of
a broadside test set. The procedure has the option of
increasing the switching activity of the tests
gradually in order to explore the tradeoff between the
number of tests, the fault coverage, and the switching
activity.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Potluri:2015:DAT,
author = "Seetal Potluri and A. Satya Trinadh and Sobhan Babu
Ch. and V. Kamakoti and Nitin Chandrachoodan",
title = "{DFT} Assisted Techniques for Peak Launch-to-Capture
Power Reduction during Launch-On-Shift At-Speed
Testing",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "14:1--14:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2790297",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scan-based testing is crucial to ensuring correct
functioning of chips. In this scheme, the scan and
capture phases are interleaved. It is well known that
for large designs, excessive switching activity during
the launch-to-capture window leads to high voltage
droop on the power grid, ultimately resulting in false
delay failures during at-speed test. This article
proposes a new design-for-testability (DFT) scheme for
launch-on-shift (LOS) testing, which ensures that the
combinational logic remains undisturbed between the
interleaved capture phases, providing
computer-aided-design (CAD) tools with extra search
space for minimizing launch-to-capture switching
activity through test pattern ordering (TPO). We
further propose a new TPO algorithm that keeps track of
the don't cares during the ordering process, so that
the don't care filling step after the ordering process
yields a better reduction in launch-to-capture
switching activity compared to any other technique in
the literature. The proposed DFT-assisted technique,
when applied to circuits in ITC99 benchmark suite,
produces an average reduction of 17.68\% in peak
launch-to-capture switching activity (CSA) compared to
the best known lowpower TPO technique. Even for
circuits whose test cubes are not rich in don't care
bits, the proposed technique produces an average
reduction of 15\% in peak CSA, while for the circuits
with test cubes rich in don't care bits ({$>$}=75\%),
the average reduction is 24\%. The proposed technique
also reduces the average power dissipation (considering
both scan cells and combinational logic) during the
scan phase by about 43.5\% on an average, compared to
the adjacent filling technique.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2015:PDU,
author = "Chien-Chih Huang and Chin-Long Wey and Jwu-E Chen and
Pei-Wen Luo",
title = "Performance-Driven Unit-Capacitor Placement of
Successive-Approximation-Register {ADCs}",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "15:1--15:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2770872",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The performance of many switched-capacitor analog
integrated circuits, such as analog-to-digital
converters (ADCs) and sample and hold circuits, is
directly related to their accurate capacitance ratios.
In general, capacitor mismatch can result from two
sources of errors: random mismatch and systematic
mismatch. Paralleling unit capacitance (UC) with a
common-centroid structure can alleviate the random
mismatch errors. The complexity of generating an
optimal solution to the UC placement problem is
extremely high, let alone if both placement and routing
problems are to be optimized simultaneously. This
article evaluates the performance of the UC placement
generated in an existing work and proposes an
alternative UC placement to achieve optimal ratio
mismatch M and better linearity performance of SAR ADC
design. Results show that the proposed UC placement
achieves a ratio mismatch of M = 0.695, the effective
number of bits ENOB = 8.314 bits, and the integral
nonlinearity INL = 0.816 LSB (least significant bits)
for a 9-bit SAR ADC design.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sun:2015:NUB,
author = "Jin Sun and Claudio Talarico and Priyank Gupta and
Janet Roveda",
title = "A New Uncertainty Budgeting-Based Method for Robust
Analog\slash Mixed-Signal Design",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "16:1--16:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2778959",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes a novel methodology for robust
analog/mixed-signal IC design by introducing a notion
of budget of uncertainty. This method employs a new
conic uncertainty model to capture process variability
and describes variability-affected circuit design as a
set-based robust optimization problem. For a
prespecified yield requirement, the proposed method
conducts uncertainty budgeting by associating
performance yield with the size of uncertainty set for
process variations. Hence the uncertainty budgeting
problem can be further translated into a tractable
robust optimization problem. Compared with the existing
robust design flow based on ellipsoid model, this
method is able to produce more reliable design
solutions by allowing varying size of conic uncertainty
set at different design points. In addition, the
proposed method addresses the limitation that the size
of the ellipsoid model is calculated solely relying on
the distribution of process parameters, while
neglecting the dependence of circuit performance upon
these design parameters. The proposed robust design
framework has been verified on various
analog/mixed-signal circuits to demonstrate its
efficiency against the ellipsoid model. Up to 24\%
reduction of design cost has been achieved by using the
uncertainty budgeting-based method.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mitra:2015:OWS,
author = "Debasis Mitra and Sarmishtha Ghoshal and Hafizur
Rahaman and Krishnendu Chakrabarty and Bhargab B.
Bhattacharya",
title = "Offline Washing Schemes for Residue Removal in Digital
Microfluidic Biochips",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "17:1--17:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2798726",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A digital microfluidic biochip (DMB) is often deployed
for multiplexing several assays in space and in time.
The residue left by one assay may contaminate the
droplets used for subsequent assays. Biochemical assays
involving cell culture and those based on particle
microfluidics also require sweeping of residual media
from an active droplet on-chip. Thus, fluidic
operations such as washing or residue removal need to
be performed routinely either to clean contamination
from the droplet pathways or to rinse off certain
droplets on the chip. In this work, several graph-based
techniques are presented for offline washing of
biochips that may have either a regular geometry (e.g.,
a 2D array of electrodes), or an irregular geometry
(e.g., an application-specific layout). The schemes can
be used for total washing, that is, for cleaning the
entire biochip or for selective washing of sites or
pathways located sparsely on the chip. The problem of
reducing the path length and washing time of the
droplets is investigated with or without capacity
constraints. The proposed algorithms for offline
washing make use of several techniques such as graph
traversal, integer linear programming (ILP) modeling,
and customized heuristics based on the nature of the
geometric distribution of the contamination profile.
The contaminated pathways are assumed to be Manhattan
or curved, and hence the techniques are applicable to
the conventional field-actuated DMBs as well as to the
emerging classes of light-actuated and active-matrix
DMBs. These techniques will be useful in enhancing the
reliability of a wide class of emerging digital
microfluidic healthcare devices",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2015:SAD,
author = "Chung-Wei Lin and Bowen Zheng and Qi Zhu and Alberto
Sangiovanni-Vincentelli",
title = "Security-Aware Design Methodology and Optimization for
Automotive Systems",
journal = j-TODAES,
volume = "21",
number = "1",
pages = "18:1--18:??",
month = nov,
year = "2015",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2803174",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Dec 3 10:15:10 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we address both security and safety
requirements and solve security-aware design problems
for the controller area network (CAN) protocol and time
division multiple access (TDMA)-based protocols. To
provide insights and guidelines for other similar
security problems with limited resources and strict
timing constraints, we propose a general security-aware
design methodology to address security with other
design constraints in a holistic framework and optimize
design objectives. The security-aware design
methodology is further applied to solve a
security-aware design problem for vehicle-to-vehicle
(V2V) communications with dedicated short-range
communication (DSRC) technology. Experimental results
demonstrate the effectiveness of our approaches in
system design without violating design constraints and
indicate that it is necessary to consider security
together with other metrics during design stages.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2016:CFS,
author = "Daming Zhang and Shuangchen Li and Yongpan Liu and
Xiaobo Sharon Hu and Xinyu He and Yining Zhang and Pei
Zhang and Huazhong Yang",
title = "A {C2RTL} Framework Supporting Partition,
Parallelization, and {FIFO} Sizing for Streaming
Applications",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "19:1--19:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2797135",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Developing circuits for streaming applications written
in C (or its variants) can benefit greatly from
C-to-RTL (C2RTL) synthesis. Yet, most existing C2RTL
tools lack system-level options to trade off various
design constraints, such as delay and area. This
article introduces a systematic way to accomplish C2RTL
synthesis for streaming applications containing
thousands of lines of C (or its variants) codes.
Synthesizing circuits for such large applications
presents serious challenges for existing C2RTL tools.
Specifically, the proposed approach determines
simultaneously the number of pipeline stages and the
number of times that each functional block is
duplicated in each pipeline stage. A mixed integer
linear programming-based solution is formulated for
obtaining the optimal solution. Furthermore, a
heuristic algorithm is developed for large-scale
problems. To accommodate the differences of the data
rates between the adjacent hardware modules,
first-in-first-out (FIFO) buffers are indispensable,
but their overheads are nonnegligible. A
parallelism-aware FIFO sizing method is also introduced
to determine the optimal sizes of FIFOs. Experimental
results on seven real-world applications demonstrate
that the algorithms in the synthesis flow can make
effective design trade-offs and find superior solutions
in a short time compared with existing approaches.
Furthermore, the algorithms achieve optimal results in
most cases with subsecond running time.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pierre:2016:AVT,
author = "Laurence Pierre",
title = "Auxiliary Variables in Temporal Specifications:
Semantic and Practical Analysis for System-Level
Requirements",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "20:1--20:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2811260",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Assertion-based verification (ABV) for IP blocks given
as synchronous RTL (register transfer level)
descriptions has now widely gained acceptance. The
challenge addressed here is ABV for systems on chip
(SoC) modeled at the system level in SystemC TLM
(Transactional Level Modeling). Requirements to be
verified at this level of abstraction usually express
temporal constraints on the interactions and
communications in the SoC. We use the IEEE standard
language PSL to formalize these temporal assertions
which represent properties on communication actions and
their parameters. Auxiliary variables are often
indispensable for this formalization, but their use may
induce semantic issues. This article discusses this
matter, analyzes various existing approaches and
proposes a summary of their advantages and
shortcomings. They are also compared to our syntactic
and semantic framework, implemented in a verification
tool. The proposed operational semantics has the
advantages of being simple and intuitive while
supporting both global and local auxiliary variables.
Experimental results on industrial case studies
illustrate its applicability.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2016:PDA,
author = "Jin-Tai Yan",
title = "Performance-Driven Assignment of Buffered {I/O}
Signals in Area-{I/O} Flip-Chip Designs",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "21:1--21:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2818642",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to the inappropriate assignment of bump pads or
the improper assignment of I/O buffers, the constructed
buffered I/O signals in an area-I/O flip-chip design
may yield longer maximum delay. In this article, the
problem of assigning performance-driven buffered I/O
signals in an area-I/O flip-chip design is first
formulated. Furthermore, the assignment of the buffered
I/O signals can be divided into two sequential phases:
Construction of performance-driven I/O signals and
Assignment of timing-constrained I/O buffers. Finally,
an efficient matching-based approach is proposed to
construct the performance-driven I/O signals for the
given I/O pins and assign the timing-constrained I/O
buffers into the constructed I/O signals in the
assignment of the buffered I/O signals in an area-I/O
flip-chip design. Compared with the experimental
results of seven tested circuits in the Elmore delay
model, the experimental results show that the
matching-based assignment in our proposed approach can
reduce 3.56\% of the total path delay, 9.72\% of the
maximum input delay, 5.90\% of the input skew, 5.64\%
of the maximum output delay, and 6.25\% of the output
skew on average by reassigning the I/O buffers. Our
proposed approach can further reduce 38.89\% of the
total path delay, 44.00\% of the maximum input delay,
49.13\% of the input skew, 44.93\% of the maximum
output delay, and 50.82\% of output skew on average by
reconstructing the I/O signals and reassigning the I/O
buffers into the I/O signals. Compared with the
experimental results of seven tested circuits in Peng's
[Peng et al. 2006] publication, the experimental
results show that our proposed matching-based approach
can further reduce 71.06\% of the total path delay,
67.83\% of the maximum input delay, 59.84\% of the
input skew, 68.87\% of the maximum output delay, and
61.46\% of the output skew on average. On the other
hand, compared with the experimental results of five
tested circuits in Lai's [Lai and Chen 2008]
publication, the experimental results show that our
proposed approach can further reduce 75.36\% of the
total path delay, 48.94\% of the input skew, and
52.80\% of the output skew on the average.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kritikakou:2016:ASC,
author = "Angeliki Kritikakou and Francky Catthoor and Vasilios
Kelefouras and Costas Goutis",
title = "Array Size Computation under Uniform Overlapping and
Irregular Accesses",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "22:1--22:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2818643",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The size required to store an array is crucial for an
embedded system, as it affects the memory size, the
energy per memory access, and the overall system cost.
Existing techniques for finding the minimum number of
resources required to store an array are less efficient
for codes with large loops and not regularly occurring
memory accesses. They have to approximate the accessed
parts of the array leading to overestimation of the
required resources. Otherwise, their exploration time
is increased with an increase over the number of the
different accessed parts of the array. We propose a
methodology to compute the minimum resources required
for storing an array which keeps the exploration time
low and provides a near-optimal result for regularly
and non-regularly occurring memory accesses and
overlapping writes and reads.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2016:IWP,
author = "Youngsik Kim and Sungjoo Yoo and Sunggu Lee",
title = "Improving Write Performance by Controlling Target
Resistance Distributions in {MLC PRAM}",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "23:1--23:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2820610",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multi-level cell (MLC) phase change RAM (PRAM) is
expected to offer lower cost main memory than DRAM.
However, poor write performance is one of the most
critical problems for practical applications of MLC
PRAM. In this article, we present two schemes to
improve write performance by controlling the target
resistance distribution of MLC PRAM cells. First, we
propose multiple RESET/SET operations that relax the
target resistance bands of intermediate logic levels
with additional RESET/SET operations, which reduces the
program time of intermediate logic levels, thereby
improving write performance. Second, we propose a
two-step write scheme consisting of lightweight write
and idle-time completion write that exploits the fact
that hot dirty data tend to be overwritten in a short
time period and the MLC PRAM often has long idle times.
Experimental results show that the multiple RESET/SET
and two-step write schemes result in an average IPC
improvement of 15.7\% and 10.4\%, respectively, on a
hybrid DRAM/PRAM main memory subsystem. Furthermore,
their integrated solution results in an average IPC
improvement of 23.2\% (up to 46.4\%).",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xiang:2016:NUB,
author = "Dong Xiang and Kele Shen",
title = "A New Unicast-Based Multicast Scheme for
Network-on-Chip Router and Interconnect Testing",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "24:1--24:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2821506",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "3D technology for networks-on-chip (NOCs) becomes
attractive. It is important to present an effective
scheme for 3D stacked NOC router and interconnect
testing. A new approach to testing of NOC routers is
proposed by classifying the routers. Routers with the
same number of input/output ports fall into the same
class. Routers of the same class are identical if their
tests are the same. A test packet is delivered to all
the identical routers by a simple unicast-based
multicast scheme. It is found that the depth of the
consumption buffer at each router has great impact on
the test delivery time because test application and
test delivery for router testing cannot be handled
concurrently. Test delivery must set a router to
operational mode. A mathematical model is presented to
evaluate the impact of consumption buffer depth on the
test delivery time. A new and simple test application
scheme is proposed for interconnect testing. Some
interesting extensions are presented for further test
time reduction and thermal considerations. Sufficient
experimental results are presented by comparison with
one previous method. The proposed method works for
single stuck-at, transition, even small delay faults at
routers, and single bridging faults at physical,
consumption and injection channels.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2016:ODM,
author = "Zipeng Li and Tsung-Yi Ho and Krishnendu Chakrabarty",
title = "Optimization of {$3$D} Digital Microfluidic Biochips
for the Multiplexed Polymerase Chain Reaction",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "25:1--25:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2811259",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A digital microfluidic biochip (DMFB) is an attractive
technology platform for revolutionizing immunoassays,
clinical diagnostics, drug discovery, DNA sequencing,
and other laboratory procedures in biochemistry. In
most of these applications, real-time polymerase chain
reaction (PCR) is an indispensable step for amplifying
specific DNA segments. To reduce the reaction time to
meet the requirement of ``real-time'' applications,
multiplexed PCR is widely utilized. In recent years,
three-dimensional (3D) DMFBs that integrate
photodetectors (i.e., cyberphysical DMFBs) have been
developed, which offer the benefits of smaller size,
higher sensitivity, and faster result generations.
However, current DMFB design methods target
optimization in only two dimensions, thus ignoring the
3D two-layer structure of a DMFB. Furthermore, these
techniques ignore practical constraints related to the
interference between on-chip device pairs, the
performance-critical PCR thermal loop, and the physical
size of devices. Moreover, some practical issues in
real scenarios are not stressed (e.g., the avoidance of
the cross-contamination for multiplexed PCR). In this
article, we describe an optimization solution for a 3D
DMFB and present a three-stage algorithm to realize a
compact 3D PCR chip layout, which includes: (i) PCR
thermal-loop optimization, (ii) 3D global placement
based on Strong-Push-Weak-Pull (SPWP) model, and (iii)
constraint-aware legalization. To avoid
cross-contamination between different DNA samples, we
also propose a Minimum-Cost-Maximum-Flow-based
(MCMF-based) method for reservoir assignment.
Simulation results for four laboratory protocols
demonstrate that the proposed approach is effective for
the design and optimization of a 3D chip for
multiplexed real-time PCR.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2016:PPG,
author = "Le Zhang and Vivek Sarin",
title = "Parallel Power Grid Analysis Based on Enlarged
Partitions",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "26:1--26:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2806885",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the size and complexity of current VLSI circuits
grows, faster power grid simulation is becoming more
and more desirable. In this article, we present a
parallel iterative method for static VLSI power grid
simulation. In the proposed enlarged-partition-based
preconditioned conjugate gradient (EPPCG) power grid
solver, the power grid is divided into disjoint
partitions that are subsequently enlarged to obtain
accurate solution within each partition. The global
solution obtained by solving enlarged partition
problems concurrently acts as a highly effective
parallel preconditioner. The combination of effective
preconditioning and efficient parallelization helps
achieve very high performance. The experiments show
that our parallel implementation can achieve
significant speed improvement [61X--142X] over a
state-of-the-art direct solver.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jin:2016:CEE,
author = "Song Jin and Songwei Pei and Yinhe Han and Huawei Li",
title = "A Cost-Effective Energy Optimization Framework of
Multicore {SoCs} Based on Dynamically Reconfigurable
Voltage-Frequency Islands",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "27:1--27:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2817207",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Voltage-frequency island (VFI)-based design has been
widely exploited for optimizing system energy of
embedded multicore chip in recent years. The existing
work either constructed a single static VFI partition
for all kinds of applications or required per-core
voltage domain configuration. However, the former
solution is hard to find one optimal VFI partition for
diverse applications while the latter one suffers from
high hardware cost. In this article, we propose a cost
effective energy optimization framework based on
dynamically reconfigurable VFI (D-VFI). Our framework
treats a small number of cores as dynamic cores
(D-cores) and configures each of them with an
independent voltage domain. At runtime, the D-cores can
be pieced together with neighboring static VFIs by
scaling their operating voltages. This can dynamically
construct the optimal VFI partitions for different
kinds of applications, thus achieving more aggressive
energy optimization under low cost. To identify the
D-cores, we propose a rules constrained task scheduling
and VFI partitioning algorithm. Moreover, we analyze
the task schedules to determine the optimal scaling
intervals which can accommodate voltage scaling induced
latency. Experimental results demonstrate that the
effectiveness of the proposed scheme.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kamal:2016:YSI,
author = "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and
Massoud Pedram",
title = "Yield and Speedup Improvements in Extensible
Processors by Allocating Extra Cycles to Some Custom
Instructions",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "28:1--28:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2830566",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we investigate the application of
different techniques for mitigating the impact of
process variations on the custom functional unit (CFU)
of extensible processors. The techniques include using
extra cycles for the CFU and extending the clock period
for the extensible processor. The former technique is
based on providing an extra clock cycle to those custom
instructions (CIs) that have timing yields smaller than
one. For this purpose, we make use of a lookup table
(LUT) for each fabricated processor. Based on a
post-fabrication analysis, the need for an extra clock
cycle for some CIs is determined. Consequently, the CI
timing violations are prevented, and all manufactured
extensible processors will work with a predefined clock
cycle time. To study the effect of the objective
function (used during the CI selection phase) on the
efficacy of the suggested architectural technique, we
investigate three different objective functions. In the
second technique, the clock period extension is used to
guarantee a design yield of one. Our results
demonstrate that combining both techniques helps
increase the speedup achieved by the extensible
processor. To assess the efficacies of the proposed
methods, several benchmarks from different application
domains are used. Results of the study reveal that the
suggested techniques provide considerable improvements
in the speedups of the extensible processors when
compared to those of approaches that do not consider
the impact of process variations.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2016:TTS,
author = "Guoqing Chen and Yi Xu and Xing Hu and Xiangyang Guo
and Jun Ma and Yu Hu and Yuan Xie",
title = "{TSocket}: Thermal Sustainable Power Budgeting",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "29:1--29:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2837023",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As technology scales, thermal management for multicore
architectures becomes a critical challenge due to
increasing power density. Existing power budgeting
techniques focus on maximizing performance under a
given power budget by optimizing the core
configurations. In multicore era, a chip-wide power
budget, however, is not sufficient to ensure thermal
constraints because the thermal sustainable power
capacity varies with different threading strategies and
core configurations. In this article, we propose two
models to dynamically estimate the thermal sustainable
power capacity in homogeneous multicore systems:
uniform power model and nonuniform power model. These
two models convert the thermal effect of threading
strategies and core configurations into power capacity,
which provide a context-based core power capacity for
power budgeting. Based on these models, we introduce a
power budgeting framework aiming to improve the
performance within thermal constraints, named as
TSocket. Compared to the chip-wide power budgeting
solution, TSocket shows 19\% average performance
improvement for the PARSEC benchmarks in single program
scenario and up to 11\% performance improvement in
multiprogram scenario. The performance improvement is
achieved by reducing thermal violations and exploring
thermal headrooms.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2016:RAR,
author = "Liang Chen and Mojtaba Ebrahimi and Mehdi B. Tahoori",
title = "Reliability-Aware Resource Allocation and Binding in
High-Level Synthesis",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "30:1--30:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2839300",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Soft error is nowadays a major reliability issue for
nanoscale VLSI, and addressing it during high-level
synthesis is essential to improve the efficiency of
error mitigation. Motivated by the observation that for
behavioral designs, especially control-flow intensive
ones, variables and operations have non-uniform soft
error vulnerabilities, we propose a novel
reliability-aware allocation and binding technique to
explore more effective soft error mitigation during
high level synthesis. We first perform a comprehensive
vulnerability analysis at the behavioral level by
considering error propagation and masking in both
control and data flows. Then the optimizations based on
integer linear programming, as well as heuristic
algorithm, are employed to incorporate the behavioral
vulnerabilities into the register and functional unit
binding phases to achieve cost-efficient error
mitigation. The experimental results reveal that
compared with the previous techniques which ignored
behavioral vulnerabilities, the proposed approach can
achieve up to 85\% reliability improvement with the
same amount of area budget in the RTL design.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dubeuf:2016:EPA,
author = "Jeremy Dubeuf and David Hely and Vincent Beroulle",
title = "{ECDSA} Passive Attacks, Leakage Sources, and Common
Design Mistakes",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "31:1--31:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2820611",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Elliptic Curves Cryptography (ECC) tends to replace
RSA for public key cryptographic services. ECC is
involved in many secure schemes such as Elliptic Curve
Diffie--Hellman (ECDH) key agreement, Elliptic Curve
Integrated Encryption Scheme (ECIES), and Elliptic
Curve Digital Signature Algorithm (ECDSA). As for every
cryptosystem, implementation of such schemes may
jeopardize the inherent security provided by the
mathematical properties of the ECC. Unfortunate
implementation or algorithm choices may create serious
vulnerabilities. The elliptic curve scalar operation is
particularly sensitive among these schemes. This
article surveys passive attacks against well-spread
elliptic curve scalar multiplication algorithms
highlighting leakage sources and common mistakes that
can be used to attack the ECDSA scheme. Experimental
results are provided to illustrate and demonstrate the
effectiveness of each vulnerability. Finally, the
article describes the link between partial leakage and
lattice attack in order to understand and demonstrate
the impact of small leakages on the security of ECDSA.
An example of side channel and lattice attack
combination on NIST P-256 is provided in the case where
the elliptic curve scalar multiplication is not
protected against DPA/CPA and a controllable device is
not accessible.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lukasiewycz:2016:SAO,
author = "Martin Lukasiewycz and Philipp Mundhenk and Sebastian
Steinhorst",
title = "Security-Aware Obfuscated Priority Assignment for
Automotive {CAN} Platforms",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "32:1--32:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2831232",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Security in automotive in-vehicle networks is an
increasing problem with the growing connectedness of
road vehicles. This article proposes a security-aware
priority assignment for automotive controller area
network (CAN) platforms with the aim of mitigating
scaling effects of attacks on vehicle fleets. CAN is
the dominating field bus in the automotive domain due
to its simplicity, low cost, and robustness. While
messages might be encrypted to enhance the security of
CAN systems, their priorities are usually identical for
automotive platforms, comprising generally a large
number of vehicle models. As a result, the identifier
uniquely defines which message is sent, allowing
attacks to scale across a fleet of vehicles with the
same platform. As a remedy, we propose a methodology
that is capable of determining obfuscated message
identifiers for each individual vehicle. Since
identifiers directly represent message priorities, the
approach has to take the resulting response time
variations into account while satisfying application
deadlines for each vehicle schedule separately. Our
approach relies on Quadratically Constrained Quadratic
Program (QCQP) solving in two stages, specifying first
a set of feasible fixed priorities and subsequently
bounded priorities for each message. With the obtained
bounds, obfuscated identifiers are determined, using a
very fast randomized sampling. The experimental
results, consisting of a large set of synthetic test
cases and a realistic case study, give evidence of the
efficiency of the proposed approach in terms of
scalability. The results also show that the diversity
of obtained identifiers is effectively optimized with
our approach, resulting in a very good obfuscation of
CAN messages in in-vehicle communication.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Suresh:2016:AVD,
author = "Chandra K. H. Suresh and Ozgur Sinanoglu and Sule
Ozev",
title = "Adapting to Varying Distribution of Unknown Response
Bits",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "33:1--33:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2835489",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Traditionally, test patterns that are generated for a
given circuit are applied in an identical manner to all
manufactured devices until each device under test
either fails or passes each test. With increasing
process variations, the statistical diversity of
manufactured devices is increasing, making such
one-size-fits-all approaches increasingly inefficient.
Adaptive test techniques address this problem by
tailoring the test decisions for the statistical
characteristics of the device under test. In this
article, we present several adaptive strategies to
enable adaptive unknown bit masking for
faster-than-at-speed testing so as to ensure no yield
loss while attaining the maximum test quality based on
tester memory constraints. We also develop a
tester-enabled compression scheme that helps alleviate
memory constraints further, shifting the tradeoff space
favorably to improve test quality.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tan:2016:ESE,
author = "Jingweijia Tan and Zhi Li and Mingsong Chen and Xin
Fu",
title = "Exploring Soft-Error Robust and Energy-Efficient
Register File in {GPGPUs} using Resistive Memory",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "34:1--34:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2827697",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The increasing adoption of graphics processing units
(GPUs) for high-performance computing raises the
reliability challenge, which is generally ignored in
traditional GPUs. GPUs usually support thousands of
parallel threads and require a sizable register file.
Such large register file is highly susceptible to soft
errors and power-hungry. Although ECC has been adopted
to register file in modern GPUs, it causes considerable
power overhead, which further increases the power
stress. Thus, an energy-efficient soft-error protection
mechanism is more desirable. Besides its extremely low
leakage power consumption, resistive memory (e.g.,
spin-transfer torque RAM) is also immune to the
radiation induced soft errors due to its magnetic field
based storage. In this article, we propose to LEverage
reSistive memory to enhance the Soft-error robustness
and reduce the power consumption (LESS) of registers in
the General-Purpose computing on GPUs (GPGPUs). Since
resistive memory experiences longer write latency
compared to SRAM, we explore the unique characteristics
of GPGPU applications to obtain the win-win gains:
achieving the near-full soft-error protection for the
register file, and meanwhile substantially reducing the
energy consumption with negligible performance
degradation. Our experimental results show that LESS is
able to mitigate the registers soft-error vulnerability
by 86\% and achieve 61\% energy savings with negligible
(e.g., 1\%) performance degradation.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2016:DTF,
author = "Irith Pomeranz",
title = "Design-for-Testability for Functional Broadside Tests
under Primary Input Constraints",
journal = j-TODAES,
volume = "21",
number = "2",
pages = "35:1--35:??",
month = jan,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2831231",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Feb 6 07:43:40 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Functional broadside tests avoid overtesting of delay
faults by creating functional operation conditions
during the clock cycles where delay faults are
detected. When a circuit is embedded in a larger
design, a functional broadside test needs to take into
consideration the functional constraints that the
design creates for its primary input vectors. At the
same time, application of primary input vectors as part
of a scan-based test requires hardware support. An
earlier work considered the case where a primary input
vector is held constant during a test. The approach
described in this article matches the hardware for
applying primary input vectors to the functional
constraints that the design creates. This increases the
transition fault coverage that can be achieved by
functional broadside tests. This article also considers
the effect on the transition fault coverage achievable
using close-to-functional broadside tests.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Young:2016:PSS,
author = "Evangeline Young and Azadeh Davoodi",
title = "Preface to Special Section on New Physical Design
Techniques for the Next Generation of Integration
Technology",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "36:1--36:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2902365",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Darav:2016:EPH,
author = "Nima Karimpour Darav and Andrew Kennings and Aysa
Fakheri Tabrizi and David Westwick and Laleh Behjat",
title = "{Eh?Placer}: a High-Performance Modern
Technology-Driven Placer",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "37:1--37:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2899381",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The placement problem has become more complex and
challenging due to a wide variety of complicated
constraints imposed by modern process technologies.
Some of the most challenging constraints and objectives
were highlighted during the most recent ACM/IEEE
International Symposium on Physical Design (ISPD)
contests. In this article, the framework of Eh?Placer
and its developed algorithms are elaborated, with the
main focus on modern technology constraints and
runtime. The technology constraints considered as part
of Eh?Placer are fence region, target density, and
detailed routability constraints. We present a complete
description on how these constraints are considered in
different stages of Eh?Placer. The results obtained
from the contests indicate that Eh?Placer is able to
efficiently handle modern technology constraints and
ranks highly among top academic placement tools.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Livramento:2016:CTA,
author = "Vinicius Livramento and Renan Netto and Chrystian Guth
and Jos{\'e} Lu{\'\i}s G{\"u}ntzel and Luiz C. V. {Dos
Santos}",
title = "Clock-Tree-Aware Incremental Timing-Driven Placement",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "38:1--38:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2858793",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The increasing impact of interconnections on overall
circuit performance makes timing-driven placement (TDP)
a crucial step toward timing closure. Current TDP
techniques improve critical paths but overlook the
impact of register placement on clock tree quality. On
the other hand, register placement techniques found in
the literature mainly focus on power consumption,
disregarding timing and routabilty. Indeed, postponing
register placement may undermine the optimization
achieved by TDP, since the wiring between sequential
and combinational elements would be touched. This work
proposes a new approach for an effective coupling
between register placement and TDP that relies on two
key aspects to handle sequential and combinational
elements separately: only the registers in the critical
paths are touched by TDP (in practice they represent a
small percentage of the total number of registers), and
the shortening of clock tree wirelength can be obtained
with limited variation in signal wirelength and
placement density. The approach consists of two steps:
(1) incremental register placement guided by a virtual
clock tree to reduce clock wiring capacitance while
preserving signal wirelength and density, and (2)
incremental TDP to minimize the total negative slack.
For the first step, we propose a novel technique that
combines clock-net contraction and register clustering
forces to reduce the clock wirelength. For the second
step, we propose a novel Lagrangian Relaxation
formulation that minimizes total negative slack for
both setup and hold timing violations. To solve the
formulation, we propose a TDP technique using a novel
discrete search that employs a Euclidean distance to
define a proper neighborhood. For the experimental
evaluation of the proposed approach, we relied on the
ICCAD 2014 TDP contest infrastructure and compared our
results with the best results obtained from that
contest in terms of timing closure, clock tree
compactness, signal wirelength, and density. Assuming a
long displacement constraint, our technique achieves
worst and total negative slack reductions of around
24\% and 26\%, respectively. In addition, our approach
leads to 44\% shorter clock tree wirelength with
negligible impact on signal wirelength and placement
density. In the face of such results, the proposed
coupling seems a useful approach to handle the
challenges faced by contemporary physical synthesis.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2016:PAC,
author = "Po-Hsun Wu and Mark Po-Hung Lin and Xin Li and
Tsung-Yi Ho",
title = "Parasitic-Aware Common-Centroid {FinFET} Placement and
Routing for Current-Ratio Matching",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "39:1--39:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2856031",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The FinFET technology is regarded as a better
alternative for modern high-performance and low-power
integrated-circuit design due to more effective channel
control and lower power consumption. However, the
gate-misalignment problem resulting from process
variation and the parasitic resistance resulting from
interconnecting wires based on the FinFET technology
becomes even more severe compared with the conventional
planar CMOS technology. Such gate misalignment and
unwanted parasitic resistance may increase the
threshold voltage and decrease the drain current of
transistors. When applying the FinFET technology to
analog circuit design, the variation of drain currents
can destroy current-ratio matching among transistors
and degrade circuit performance. In this article, we
present the first FinFET placement and routing
algorithms for layout generation of a common-centroid
FinFET array to precisely match the current ratios
among transistors. Experimental results show that the
proposed matching-driven FinFET placement and routing
algorithms can obtain the best current-ratio matching
compared with the state-of-the-art common-centroid
placer.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2016:FTS,
author = "Jinglei Huang and Song Chen and Wei Zhong and Wenchao
Zhang and Shengxi Diao and Fujiang Lin",
title = "Floorplanning and Topology Synthesis for
Application-Specific Network-on-Chips with
{RF}-Interconnect",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "40:1--40:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2890499",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Application-specific Network-on-Chip (ASNoC) has been
proposed as a promising solution to address the global
communication challenges in System-on-Chips. However,
with the number of cores increasing, the on-chip
communication becomes more and more complex and the
power consumption imposes the major challenge for
designing ASNoCs. In this article, we propose a
four-stage floorplanning and topology synthesis
approach for ASNoCs with Radio-Frequency Interconnect
(RF-I). First, considering the advantage of RF-I in
long-distance on-chip communication, we integrate the
floorplanning and clustering to explore the proper
clustering of cores, where the cores belonging to the
same cluster will share the same switch for
communications, form an island, and occupy a contiguous
physical region. After the switches and network
interfaces are inserted into the floorplan, the
allocation of routing paths and the RF-I logical
channels are integrated in an iterative procedure to
generate fine-grained dynamically reconfigurable ASNoC
topologies. Finally, considering the signal integrity
of RF-I, we adjust the placement of the switches by a
simulated annealing-based method to reduce the number
of RF-I routing corners. To evaluate the placement of
switches, we propose a dynamical programming-based
method to route the transmission line with the
minimized number of routing corners in linear time. The
results show that, using the RF-I, we can reduce the
power consumption of ASNoCs by 20\% to 26\%.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xu:2016:ACS,
author = "Chang Xu and Guojie Luo and Peixin Li and Yiyu Shi and
Iris Hui-Ru Jiang",
title = "Analytical Clustering Score with Application to
Postplacement Register Clustering",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "41:1--41:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2894753",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Circuit clustering is usually done through discrete
optimizations to enable circuit size reduction or
design-specific cluster formation. In this article, we
are interested in the register-clustering technique for
clock-power reduction by leveraging new opportunities
introduced by multibit flip-flop (MBFF). Currently,
INTEGRA is the only existing postplacement MBFF
clustering optimizer with a subquadratic time
complexity. However, it severely degrades the
wirelength, especially for realistic designs, which may
nullify the benefits of MBFF clustering. In contrast,
we formulate an analytical clustering score with a
nonlinear programming framework, in which the
wirelength objective can be seamlessly integrated and
the solver has empirical subquadratic time complexity.
With the MBFF library, the application of our
analytical clustering method achieves comparable clock
power to the state-of-the-art techniques, but further
reduces the wirelength by about 25\%. Even without the
MBFF library, we can still achieve 30\% clock
wirelength reduction. In addition, the proposed method
can potentially be integrated into an in-placement MBFF
clustering solver and be applied to other problems that
require formulating clustering scores in their
objective functions.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xu:2016:PPA,
author = "Xiaoqing Xu and Bei Yu and Jhih-Rong Gao and Che-Lun
Hsu and David Z. Pan",
title = "{PARR}: Pin-Access Planning and Regular Routing for
Self-Aligned Double Patterning",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "42:1--42:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2842612",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Pin access has become one of the most difficult
challenges for detailed routing in advanced technology
nodes, for example, in 14nm and below, for which
double-patterning lithography has to be used for
manufacturing lower metal routing layers with tight
pitches, such as M2 and M3. Self-aligned double
patterning (SADP) provides better control on line edge
roughness and overlay, but it has very restrictive
design constraints and prefers regular layout patterns.
This article presents a comprehensive pin-access
planning and regular routing framework (PARR) for SADP
friendliness. Our key techniques include precomputation
of both intracell and intercell pin accessibility, as
well as local and global pin-access planning to enable
handshaking between standard cell-level pin access and
detailed routing under SADP constraints. A pin
access-driven rip-up and reroute scheme is proposed to
improve the ultimate routability. Our experimental
results demonstrate that PARR can achieve much better
routability and overlay control compared with previous
approaches.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yu:2016:EOA,
author = "Bei Yu and Kun Yuan and Jhih-Rong Gao and Shiyan Hu
and David Z. Pan",
title = "{EBL} Overlapping Aware Stencil Planning for {MCC}
System",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "43:1--43:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2888394",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Electron beam lithography (EBL) is a promising,
maskless solution for the technology beyond 14nm logic
nodes. To overcome its throughput limitation, industry
has proposed character projection (CP) technique, where
some complex shapes (characters) can be printed in one
shot. Recently, the traditional EBL system was extended
into a multi-column cell (MCC) system to further
improve the throughput. In an MCC system, several
independent CPs are used to further speed-up the
writing process. Because of the area constraint of
stencil, the MCC system needs to be packed/planned
carefully to take advantage of the characters. In this
article, we prove that the overlapping aware stencil
planning (OSP) problem is NP-hard. Then we propose
E-BLOW, a tool to solve the MCC system OSP problem.
E-BLOW involves several novel speedup techniques, such
as successive relaxation and dynamic programming.
Experimental results show that, compared with previous
works, E-BLOW demonstrates better performance for both
the conventional EBL system and the MCC system.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2016:NAP,
author = "Seungwon Kim and Seokhyeong Kang and Ki Jin Han and
Youngmin Kim",
title = "Novel Adaptive Power-Gating Strategy and Tapered {TSV}
Structure in Multilayer {$3$D} {IC}",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "44:1--44:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2894752",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Among power dissipation components, leakage power has
become more dominant with each successive technology
node. Power-gating techniques have been widely used to
reduce the standby leakage energy. In this work, we
investigate a power-gating strategy for through-silicon
via (TSV)-based 3D IC stacking structures. Power-gating
control is becoming more complicated as more dies are
stacked. We combine the on-chip PDN and TSV in a
multilayered 3D IC to perform power-gating analysis of
the static and dynamic voltage drops and in-rush
current. Then, we propose a novel power-gating strategy
that optimizes the in-rush current profile, subject to
the voltage-drop constraints. Our power-gating strategy
provides a minimal wake-up latency such that the
voltage noise safety margins are not violated. In
addition, the layer dependency of the 3D IC on the
power gating is analyzed in terms of the wake-up time
reduction. We achieve an average wake-up time reduction
of 43\% for all cases with our adaptive power-gating
method that exploits location (or layer) information
regarding the aggressors in a 3D IC. A tapered TSV
architecture based on the layer dependency has been
analyzed; it exhibits up to 18\% wake-up time reduction
compared to that of circuits with uniform TSVs.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2016:DCV,
author = "Gong Chen and Toru Fujimura and Qing Dong and
Shigetoshi Nakatake and Bo Yang",
title = "{DC} Characteristics and Variability on 90nm {CMOS}
Transistor Array-Style Analog Layout",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "45:1--45:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2888395",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the MOS analog layout, variability suppression is
becoming a major issue, as is layout efficiency.
Introducing a transistor array (TA) style to analog
layout, this article addresses the layout-dependent
variability based on the measurement results of test
chips on 90nm CMOS process. In TA style, a large
transistor is decomposed into a set of unified
subtransistors, which are connected in series or
parallel. Focusing on one row layout of diffusion
sharing for the multiple gates, we analyze the current
direction-dependent variability and the leakage current
via off-gates for the electrical isolation.
Furthermore, we present several analog design cases on
TA including analysis of the impact on the DC
characteristics caused by the transistor channel
decomposition.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2016:MSM,
author = "Chao Wang and Chuansheng Dong and Haibo Zeng and
Zonghua Gu",
title = "Minimizing Stack Memory for Hard Real-Time
Applications on Multicore Platforms with Partitioned
Fixed-Priority or {EDF} Scheduling",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "46:1--46:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2846096",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicore processors are increasingly adopted in
resource-constrained real-time embedded applications.
In the development of such applications, efficient use
of RAM memory is as important as the effective
scheduling of software tasks. Preemption Threshold
Scheduling (PTS) is a well-known technique for
controlling the degree of preemption, possibly
improving system schedulability, and to reduce system
stack usage. In this paper, we consider partitioned
multi-processor scheduling on a multicore processor
with either Fixed-Priority or Earliest Deadline First
scheduling algorithms with PTS and address the design
optimization problem of mapping tasks to processor
cores and assignment of task priorities and preemption
thresholds with the optimization objective of
minimizing system stack usage. We present both optimal
solution techniques based on Mixed Integer Linear
Programming and efficient heuristic algorithms that can
achieve high-quality results. We perform extensive
performance evaluations using both synthetic tasksets
and industrial case studies.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2016:DWC,
author = "Sungkwang Lee and Taemin Lee and Hyunsun Park and
Junwhan Ahn and Sungjoo Yoo and Youjip Won and Sunggu
Lee",
title = "Differential Write-Conscious Software Design on
Phase-Change Memory: an {SQLite} Case Study",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "47:1--47:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2842613",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Phase-change memory (PCM) has several benefits
including low cost, non-volatility,
byte-addressability, etc., and limitations such as
write endurance. There have been several hardware
approaches to exploit the benefits while minimizing the
negative impact of limitations. Software approaches
could give further improvements, when used together
with hardware approaches, by taking advantage of write
behavior present in the program, e.g., write behavior
on dynamically allocated data, which is hardly captured
by hardware approaches. This work proposes a software
design methodology to reduce costly PCM writes. First,
on top of existing hardware approach such as
Flip-N-Write, we advocate exploiting the capability of
PCM bit-level differential write in the software by
judiciously reusing previously allocated memory
resource. In order to avoid wear-out incurred by the
reuse, we present software-based wear-leveling methods
that distribute writes across PCM cells. In order to
further reduce PCM writes, we propose identifying data,
the loss of which does not affect the functionality of
the underlying software, and then diverting write
traffic for those data items to volatile memory. To
evaluate the effectiveness of these methods, as a case
study, we applied the proposed methods to the design of
journaling in SQLite, which is an important database
application commonly used in smartphones. For the
experiments, we used an in-house PCM-based prototype
board. Our experiments with four representative mobile
applications show that the proposed design methods,
which is applied on top of the hardware approach,
Flip-N-Write, result in 75.2\% further reduction in
total bit updates in PCM, on average, without
aggravating wear-out compared with the baseline of
PCM-based journaling, which is based only on the
hardware approach. Also, the proposed design methods
result in 49.4\% reduction in energy consumption and
52.3\% reduction in runtime compared to a typical FIFO
management of free resources.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2016:FOF,
author = "Xing Huang and Wenzhong Guo and Genggeng Liu and
Guolong Chen",
title = "{FH-OAOS}: a Fast Four-Step Heuristic for
Obstacle-Avoiding Octilinear {Steiner} Tree
Construction",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "48:1--48:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2856033",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the sharp increase of very large-scale integrated
(VLSI) circuit density, we are faced with many knotty
issues. Particularly in the routing phase of VLSI
physical design, the interconnection effects directly
relate to the final performance of circuits. However,
the optimization capability of traditional rectilinear
architecture is limited; thus, both academia and
industry have been devoted to nonrectilinear
architecture in recent years, especially octilinear
architecture, which is the most promising because it
can greatly improve the performance of modern chips. In
this article, we design FH-OAOS, an obstacle-avoiding
algorithm in octilinear architecture, by constructing
an obstacle-avoiding the octilinear Steiner minimal
tree (OAOSMT). Our approach first constructs an
obstacle-free Euclidean minimal spanning tree (OFEMST)
on the given pins based on Delaunay triangulation (DT).
Then, two lookup tables about OFEMST's edge are
generated, which can be seen as the information center
of FH-OAOS and can provide information support for
algorithm operation. Next, an efficient
obstacle-avoiding strategy is proposed to convert the
OFEMST into an obstacle-avoiding octilinear Steiner
tree (OAOST). Finally, the generated OAOST is refined
to construct the final OAOSMT by applying three
effective strategies. Experimental results on various
benchmarks show that FH-OAOS achieves 66.39 times
speedup on average, while the average wirelength of the
final OAOSMT is only 0.36\% larger than the best
existing solution.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mittal:2016:STC,
author = "Sparsh Mittal",
title = "A Survey of Techniques for Cache Locking",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "49:1--49:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2858792",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Cache memory, although important for boosting
application performance, is also a source of execution
time variability, and this makes its use difficult in
systems requiring worst-case execution time (WCET)
guarantees. Cache locking is a promising approach for
simplifying WCET estimation and providing
predictability, and hence, several commercial
processors provide ability for locking cache. However,
cache locking also has several disadvantages (e.g.,
extra misses for unlocked blocks, complex algorithms
required for selection of locking contents) and hence,
a careful management is required to realize the full
potential of cache locking. In this article, we present
a survey of techniques proposed for cache locking. We
categorize the techniques into several groups to
underscore their similarities and differences. We also
discuss the opportunities and obstacles in using cache
locking. We hope that this article will help
researchers gain insight into cache locking schemes and
will also stimulate further work in this area.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Venkatasubramanian:2016:PID,
author = "Ramachandran Venkatasubramanian and Robert Elio and
Sule Ozev",
title = "Process Independent Design Methodology for the Active
{RC} and Single-Inverter-Based Rail Clamp",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "50:1--50:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2851490",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "RC and single-inverter-based rail clamps are widely
used in semiconductor products for electrostatic
discharge (ESD) protection. We propose a
technology-node-independent design methodology for
these rail clamp circuits that takes process, voltage,
and temperature variations into consideration. The
methodology can be used as a cookbook by the designer
or be used to automate the entire design process.
Tradeoffs between various design metrics such as ESD
performance (Human Body Model), leakage, and area are
considered. Simplified circuit models for the rail
clamp are presented to gain insights into its working
and to size the circuit components. A rail clamp for
core power domain is designed using the proposed
approach in 40nm low-power process and performance
results of the design are also presented. The
effectiveness of the design methodology is proven in
three different technology nodes by comparing the
obtained design with the best design from among 250,000
designs obtained by randomly sampling from the design
space.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2016:SDM,
author = "Sangmin Kim and Seokhyeong Kang and Youngsoo Shin",
title = "Synthesis of Dual-Mode Circuits Through Library
Design, Gate Sizing, and Clock-Tree Optimization",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "51:1--51:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2856032",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A dual-mode circuit is a circuit that has two
operating modes: a default high-performance mode at
nominal voltage and a secondary low-performance
near-threshold voltage (NTV) mode. A key problem that
we address is to maximize NTV mode clock frequency.
Some cells that are particularly slow in NTV mode are
optimized through transistor sizing and stack removal;
static noise margin of each gate is extracted and
appended in a library so that function failures can be
checked and removed during synthesis. A new gate-sizing
algorithm is proposed that takes account of timing
slacks at both modes. A new sensitivity measure is
introduced for this purpose; binary search is then
applied to find the maximum NTV mode frequency.
Clock-tree synthesis is reformulated to minimize clock
skew at both modes. This is motivated by the fact that
the proportion of load-dependent delay along clock
paths, as well as clock-path delays themselves, should
be made equal. Experiments on some test circuits
indicate that NTV mode clock period is reduced by 24\%,
on average; clock skew at NTV decreases by 13\%, on
average; and NTV mode energy-delay product is reduced
by 20\%, on average.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Qian:2016:PEN,
author = "Zhiliang Qian and Paul Bogdan and Chi-Ying Tsui and
Radu Marculescu",
title = "Performance Evaluation of {NoC}-Based Multicore
Systems: From Traffic Analysis to {NoC} Latency
Modeling",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "52:1--52:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2870633",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this survey, we review several approaches for
predicting performance of Network-on-Chip (NoC)-based
multicore systems, starting from the traffic models to
the complex NoC models for latency evaluation. We first
review typical traffic models to represent the
application workloads in NoC. Specifically, we review
Markovian and non-Markovian (e.g., self-similar or
long-range memory-dependent) traffic models and discuss
their applications on multicore platform design. Then,
we review the analytical techniques to predict NoC
performance under given input traffic. We investigate
analytical models for average as well as maximum delay
evaluation. We also review the developments and design
challenges of NoC simulators. One interesting research
direction in NoC performance evaluation consists of
combining simulation and analytical models in order to
exploit their advantages together. Toward this end, we
discuss several newly proposed approaches that use
hardware-based or learning-based techniques. Finally,
we summarize several open problems and our perspective
to address these challenges.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kashif:2016:PSR,
author = "Hany Kashif and Hiren Patel and Sebastian
Fischmeister",
title = "Path Selection for Real-Time Communication on
Priority-Aware {NoCs}",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "53:1--53:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2866572",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This work investigates selecting paths for
communication flows when deploying a hard real-time
application on a chip-multiprocessor system. This
chip-multiprocessor system uses a priority-aware
real-time network-on-chip interconnect between the
processors. Given a mapping of the computation tasks
onto the chip-multiprocessor, the problem we address in
this work is to discover paths the communication flows
take such that hard real-time deadlines of flows are
met. Furthermore, we must ensure that deadlines are met
even in the presence of direct and indirect
interference from other flows sharing network links on
the path. To achieve this, our algorithm utilizes a
stage-level analysis for real-time communication to
determine the impact of a network link being used by a
flow, and its effect on other flows sharing the link.
The path selection algorithm uses heuristics such as
selecting links with least interference, and
considering lower-priority flows when dedicating links
to paths of higher-priority flows since an optimal one
is intractable. The algorithm also considers
constraints on the number of virtual channels at each
router port in the network. The statistically
significant experimental results show an improvement in
schedulability by 5\% and 12\% over existing path
selection algorithms such as Minimum Interference
Routing and Widest Shortest Path algorithms,
respectively. We also present a set-top box case study
to further illustrate the benefits of using the
proposed algorithm.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2016:ECM,
author = "Chuangwen Liu and Peishan Tu and Pangbo Wu and Haomo
Tang and Yande Jiang and Jian Kuang and Evangeline F.
Y. Young",
title = "An Effective Chemical Mechanical Polishing Fill
Insertion Approach",
journal = j-TODAES,
volume = "21",
number = "3",
pages = "54:1--54:??",
month = jul,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2886097",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "To reduce chip-scale topography variation, dummy fill
is commonly used to improve the layout density
uniformity. Previous works either sought the most
uniform density distribution or sought to minimize the
inserted dummy fills while satisfying certain density
uniformity constraint. However, due to more stringent
manufacturing challenges, more criteria, like line
deviation and outlier, emerge at newer technology
nodes. This article presents a joint optimization
scheme to consider variation, total fill, line
deviation, outlier, overlap, and running time
simultaneously. More specifically, first we decompose
the rectilinear polygons and partition fillable regions
into rectangles for easier processing. After
decomposition, we insert dummy fills into the fillable
rectangular regions optimizing the fill metrics
simultaneously. We propose three approaches, Fast
Median approach, LP approach, and Iterative approach,
which are much faster with better quality, compared
with the results of the top three contestants in the
ICCAD Contest 2014.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zuluaga:2016:SSN,
author = "Marcela Zuluaga and Peter Milder and Markus
P{\"u}schel",
title = "Streaming Sorting Networks",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "55:1--55:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2854150",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Sorting is a fundamental problem in computer science
and has been studied extensively. Thus, a large variety
of sorting methods exist for both software and hardware
implementations. For the latter, there is a trade-off
between the throughput achieved and the cost (i.e., the
logic and storage invested to sort n elements). Two
popular solutions are bitonic sorting networks with O (
n log$^2$ n ) logic and storage, which sort n elements
per cycle, and linear sorters with O ( n ) logic and
storage, which sort n elements per n cycles. In this
article, we present new hardware structures that we
call streaming sorting networks, which we derive
through a mathematical formalism that we introduce, and
an accompanying domain-specific hardware generator that
translates our formal mathematical description into
synthesizable RTL Verilog. With the new networks, we
achieve novel and improved cost-performance trade-offs.
For example, assuming that n is a two-power and w is
any divisor of n, one class of these networks can sort
in n /; w cycles with O ( w log$^2$ n ) logic and O ( n
log$^2$ n ) storage; the other class that we present
sorts in n log$^2$ n /; w cycles with O ( w ) logic and
O ( n ) storage. We carefully analyze the performance
of these networks and their cost at three levels of
abstraction: (1) asymptotically, (2) exactly in terms
of the number of basic elements needed, and (3) in
terms of the resources required by the actual circuit
when mapped to a field-programmable gate array. The
accompanying hardware generator allows us to explore
the entire design space, identify the Pareto-optimal
solutions, and show superior cost-performance
trade-offs compared to prior work.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2016:SRE,
author = "Yue Zhao and Taeyoung Kim and Hosoon Shin and Sheldon
X.-D. Tan and Xin Li and Haibao Chen and Hai Wang",
title = "Statistical Rare-Event Analysis and Parameter Guidance
by Elite Learning Sample Selection",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "56:1--56:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2875422",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Accurately estimating the failure region of rare
events for memory-cell and analog circuit blocks under
process variations is a challenging task. In this
article, we propose a new statistical method, called
EliteScope, to estimate the circuit failure rates in
rare-event regions and to provide conditions of
parameters to achieve targeted performance. The new
method is based on the iterative blockade framework to
reduce the number of samples, but consists of two new
techniques to improve existing methods. First, the new
approach employs an elite-learning sample-selection
scheme, which can consider the effectiveness of samples
and well coverage for the parameter space. As a result,
it can reduce additional simulation costs by pruning
less effective samples while keeping the accuracy of
failure estimation. Second, the EliteScope identifies
the failure regions in terms of parameter spaces to
provide a good design guidance to accomplish the
performance target. It applies variance-based feature
selection to find the dominant parameters and then
determine the in-spec boundaries of those parameters.
We demonstrate the advantage of our proposed method
using several memory and analog circuits with different
numbers of process parameters. Experiments on four
circuit examples show that EliteScope achieves a
significant improvement on failure-region estimation in
terms of accuracy and simulation cost over traditional
approaches. The 16b 6T-SRAM column example also
demonstrates that the new method is scalable for
handling large problems with large numbers of process
variables.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ewetz:2016:CRC,
author = "Rickard Ewetz and Cheng-Kok Koh",
title = "Construction of Reconfigurable Clock Trees for {MCMM}
Designs Using Mode Separation and Scenario
Compression",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "57:1--57:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2883609",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The clock networks of many modern circuits have to
operate in multiple corners and multiple modes (MCMM).
We propose to construct mode-reconfigurable clock trees
(MRCTs) based on mode separation and scenario
compression. The technique of scenario compression is
proposed to consider the timing constraints in multiple
scenarios at the same time, compressing the MCMM
problem into an equivalent single-corner multiple-mode
(SCMM), or single-corner single-mode (SCSM) problem.
The compression is performed by combining the skew
constraints of the different scenarios in skew
constraint graphs based on delay linearization and
dominating skew constraints. An MRCT consists of
several clock trees and mode separation involves,
depending on the active mode, selecting one of the
clock trees to deliver the clock signal. To limit the
overhead, the bottom part (closer to the clock sinks)
of all the different clock trees are shared and only
the top part (closer to the clock source) of the clock
network is mode reconfigurable. The reconfiguration is
realized using OR-gates and a one-input-multiple-output
demultiplexer. The experimental results show that for a
set of synthesized MCMM circuits, with 715 to 13, 216
sequential elements, the proposed approach can achieve
high yield.",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ghasemzadeh:2016:HAE,
author = "Hassan Ghasemzadeh and Ramin Fallahzadeh and Roozbeh
Jafari",
title = "A Hardware-Assisted Energy-Efficient Processing Model
for Activity Recognition Using Wearables",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "58:1--58:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2886096",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Wearables are being widely utilized in health and
wellness applications, primarily due to the recent
advances in sensor and wireless communication, which
enhance the promise of wearable systems in providing
continuous and real-time monitoring and interventions.
Wearables are generally composed of hardware/software
components for collection, processing, and
communication of physiological data. Practical
implementation of wearable monitoring in real-life
applications is currently limited due to notable
obstacles. The wearability and form factor are
dominated by the amount of energy needed for sensing,
processing, and communication. In this article, we
propose an ultra-low-power granular decision-making
architecture, also called screening classifier, which
can be viewed as a tiered wake-up circuitry, consuming
three orders of magnitude-less power than the
state-of-the-art low-power microcontrollers. This
processing model operates based on computationally
simple template matching modules, based on coarse- to
fine-grained analysis of the signals with on-demand and
gradually increasing the processing power consumption.
Initial template matching rejects signals that are
clearly not of interest from the signal processing
chain, keeping the rest of processing blocks idle. If
the signal is likely of interest, the sensitivity and
the power of the template matching modules are
gradually increased, and ultimately, the main
processing unit is activated. We pose optimization
techniques to efficiently split a full template into
smaller bins, called mini-templates, and activate only
a subset of bins during each classification decision.
Our experimental results on real data show that this
signal screening model reduces power consumption of the
processing architecture by a factor of 70\% while the
sensitivity of detection remains at least 80\%.",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Teman:2016:PAP,
author = "Adam Teman and Davide Rossi and Pascal Meinerzhagen
and Luca Benini and Andreas Burg",
title = "Power, Area, and Performance Optimization of Standard
Cell Memory Arrays Through Controlled Placement",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "59:1--59:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2890498",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Embedded memory remains a major bottleneck in current
integrated circuit design in terms of silicon area,
power dissipation, and performance; however, static
random access memories (SRAMs) are almost exclusively
supplied by a small number of vendors through memory
generators, targeted at rather generic design
specifications. As an alternative, standard cell
memories (SCMs) can be defined, synthesized, and placed
and routed as an integral part of a given digital
system, providing complete design flexibility, good
energy efficiency, low-voltage operation, and even area
efficiency for small memory blocks. Yet implementing an
SCM block with a standard digital flow often fails to
exploit the distinct and regular structure of such an
array, leaving room for optimization. In this article,
we present a design methodology for optimizing the
physical implementation of SCM macros as part of the
standard design flow. This methodology introduces
controlled placement, leading to a structured,
noncongested layout with close to 100\% placement
utilization, resulting in a smaller silicon footprint,
reduced wire length, and lower power consumption
compared to SCMs without controlled placement. This
methodology is demonstrated on SCM macros of various
sizes and aspect ratios in a state-of-the-art 28nm
fully depleted silicon-on-insulator technology, and
compared with equivalent macros designed with the
noncontrolled, standard flow, as well as with
foundry-supplied SRAM macros. The controlled SCMs
provide an average 25\% reduction in area as compared
to noncontrolled implementations while achieving a
smaller size than SRAM macros of up to 1Kbyte. Power
and performance comparisons of controlled SCM blocks of
a commonly found 256 $ \times $ 32 (1 Kbyte) memory
with foundry-provided SRAMs show greater than 65\% and
10\% reduction in read and write power, respectively,
while providing faster access than their SRAM
counterparts, despite being of an aspect ratio that is
typically unfavorable for SCMs. In addition, the SCM
blocks function correctly with a supply voltage as low
as 0.3V, well below the lower limit of even the SRAM
macros optimized for low-voltage operation. The
controlled placement methodology is applied within a
full-chip physical implementation flow of an
OpenRISC-based test chip, providing more than 50\%
power reduction compared to equivalently sized compiled
SRAMs under a benchmark application.",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Narayanaswamy:2016:BRE,
author = "Swaminathan Narayanaswamy and Steffen Schlueter and
Sebastian Steinhorst and Martin Lukasiewycz and
Samarjit Chakraborty and Harry Ernst Hoster",
title = "On Battery Recovery Effect in Wireless Sensor Nodes",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "60:1--60:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2890501",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the perennial demand for longer runtime of
battery-powered Wireless Sensor Nodes (WSNs), several
techniques have been proposed to increase the battery
runtime. One such class of techniques exploiting the
battery recovery effect phenomenon claims that
performing an intermittent discharge instead of a
continuous discharge will increase the usable battery
capacity. Several works in the areas of embedded
systems and wireless sensor networks have assumed the
existence of this recovery effect and proposed
different power management techniques in the form of
power supply architectures (multiple battery setup) and
communication protocols (burst mode transmission) in
order to exploit it. However, until now, a systematic
experimental evaluation of the recovery effect has not
been performed with real battery cells, using
high-accuracy battery testers to confirm the existence
of this recovery phenomenon. In this article, a
systematic evaluation procedure is developed to verify
the existence of this battery recovery effect. Using
our evaluation procedure, we investigated Alkaline,
Nickel-Metal Hydride (NiMH), and Lithium-Ion (Li-Ion)
battery chemistries, which are commonly used as power
supplies for Wireless Sensor Node (WSN) applications.
Our experimental results do not show any evidence of
the aforementioned recovery effect in these battery
chemistries. In particular, our results show a
significant deviation from the stochastic battery
models, which were used by many power management
techniques. Therefore, the existing power management
approaches that rely on this recovery effect do not
hold in practice. Instead of a battery recovery effect,
our experimental results show the existence of the rate
capacity effect, which is the reduction of usable
battery capacity with higher discharge power, to be the
dominant electrochemical phenomenon that should be
considered for maximizing the runtime of WSN
applications. We outline power management techniques
that minimize the rate capacity effect in order to
obtain a higher energy output from the battery.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tannir:2016:AMN,
author = "Dani Tannir and Ya Wang and Peng Li",
title = "Accurate Modeling of Nonideal Low-Power {PWM} {DC--DC}
Converters Operating in {CCM} and {DCM} using Enhanced
Circuit-Averaging Techniques",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "61:1--61:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2890500",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The development of enhanced modeling techniques for
the simulation of switched-mode Pulse Width Modulated
(PWM) DC-DC power converters using circuit averaging is
the main focus of this article. The circuit-averaging
technique has traditionally been used to model the
behavior of PWM DC-DC converters without considering
important nonideal characteristics of the switching
devices. As a result, most of these existing approaches
present simplified models that are ideal or linearized,
and do not accurately account for the performance
characteristics of the converter. This is especially
problematic for low-power applications. In this
article, we present an enhanced nonideal behavioral
circuit-averaged model that makes the simulation of
DC-DC converters both computationally efficient and
accurate, thereby presenting an important tool for
circuit designers. Experimentally, we show that our
Verilog-A-based new model allows for accurate
simulation of both Buck- and Boost-type PWM converters
operating in either CCM or DCM modes while providing
more than one order of magnitude speedup over the
transistor-level simulation.",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Steinhorst:2016:CPC,
author = "Sebastian Steinhorst and Matthias Kauer and Arne Meeuw
and Swaminathan Narayanaswamy and Martin Lukasiewycz
and Samarjit Chakraborty",
title = "Cyber-Physical Co-Simulation Framework for Smart Cells
in Scalable Battery Packs",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "62:1--62:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2891407",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article introduces a Cyber-physical Co-Simulation
Framework (CPCSF) for design and analysis of smart
cells that enable scalable battery pack and Battery
Management System (BMS) architectures. In contrast to
conventional cells in battery packs, where all cells
are monitored and controlled centrally, each smart cell
is equipped with its own electronics in the form of a
Cell Management Unit (CMU). The CMU maintains the cell
in a safe and healthy operating state, while
system-level battery management functions are performed
by cooperation of the smart cells via communication.
Here, the smart cells collaborate in a self-organizing
fashion without a central controller instance. This
enables maximum scalability and modularity,
significantly simplifying integration of battery packs.
However, for this emerging architecture, system-level
design methodologies and tools have not been
investigated yet. By contrast, components are developed
individually and then manually tested in a hardware
development platform. Consequently, the systematic
design of the hardware/software architecture of smart
cells requires a cyber-physical multi-level
co-simulation of the network of smart cells that has to
include all the components from the software,
electronic, electric, and electrochemical domains. This
comprises distributed BMS algorithms running on the
CMUs, the communication network, control circuitry,
cell balancing hardware, and battery cell behavior. For
this purpose, we introduce a CPCSF that enables rapid
design and analysis of smart cell hardware/software
architectures. Our framework is then applied to
investigate request-driven active cell balancing
strategies that make use of the decentralized system
architecture. In an exhaustive analysis on a realistic
21.6kW h Electric Vehicle (EV) battery pack containing
96 smart cells in series, the CPCSF is able to simulate
hundreds of balancing runs together with all system
characteristics, using the proposed request-driven
balancing strategies at highest accuracy within an
overall time frame of several hours. Consequently, the
presented CPCSF for the first time allows us to
quantitatively and qualitatively analyze the behavior
of smart cell architectures for real-world
applications.",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Guin:2016:FCS,
author = "Ujjwal Guin and Qihang Shi and Domenic Forte and Mark
M. Tehranipoor",
title = "{FORTIS}: a Comprehensive Solution for Establishing
Forward Trust for Protecting {IPs} and {ICs}",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "63:1--63:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2893183",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the advent of globalization in the semiconductor
industry, it is necessary to prevent unauthorized usage
of third-party IPs (3PIPs), cloning and unwanted
modification of 3PIPs, and unauthorized production of
ICs. Due to the increasing complexity of ICs,
system-on-chip (SoC) designers use various 3PIPs in
their design to reduce time-to-market and development
costs, which creates a trust issue between the SoC
designer and the IP owners. In addition, as the ICs are
fabricated around the globe, the SoC designers give
fabrication contracts to offshore foundries to
manufacture ICs and have little control over the
fabrication process, including the total number of
chips fabricated. Similarly, the 3PIP owners lack
control over the number of fabricated chips and/or the
usage of their IPs in an SoC. Existing research only
partially addresses the problems of IP piracy and IC
overproduction, and to the best of our knowledge, there
is no work that considers IP overuse. In this article,
we present a comprehensive solution for preventing IP
piracy and IC overproduction by assuring forward trust
between all entities involved in the SoC design and
fabrication process. We propose a novel design flow to
prevent IC overproduction and IP overuse. We use an
existing logic encryption technique to obfuscate the
netlist of an SoC or a 3PIP and propose a modification
to enable manufacturing tests before the activation of
chips which is absolutely necessary to prevent
overproduction. We have used asymmetric and symmetric
key encryption, in a fashion similar to Pretty Good
Privacy (PGP), to transfer keys from the SoC designer
or 3PIP owners to the chips. In addition, we also
propose to attach an IP digest (a cryptographic hash of
the entire IP) to the header of an IP to prevent
modification of the IP by the SoC designers. We have
shown that our approach is resistant to various attacks
with the cost of minimal area overhead.",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2016:TPD,
author = "William Lee and Vikas S. Vij and Kenneth S. Stevens",
title = "Timing Path-Driven Cycle Cutting for Sequential
Controllers",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "64:1--64:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2893473",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power and performance optimization of integrated
circuits is performed by timing-driven algorithms that
operate on directed acyclic graphs. Sequential circuits
and circuits with topological feedback contain cycles.
Cyclic circuits must be represented as directed acyclic
graphs to be optimized and evaluated using static
timing analysis. Algorithms in commercial electronic
design automation tools generate the required acyclic
graphs by cutting cycles without considering timing
paths. This work reports on a method for generating
directed acyclic circuit graphs that do not cut the
specified timing paths. The algorithm is applied to
over 125 benchmark designs and asynchronous handshake
controllers. The runtime is less than 1 second, even
for even the largest published controllers. Circuit
timing graphs generated using this method retain the
necessary timing paths, which enables circuit
validation and optimization employing the commercial
tools. Additional benefits show these designs are on an
average a third in size, operate 33.3\% faster, and
consume one-fourth the energy.",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xu:2016:HSL,
author = "Yang Xu and J{\"u}rgen Teich",
title = "Hierarchical Statistical Leakage Analysis and Its
Application",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "65:1--65:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2896820",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we investigate a hierarchical
statistical leakage analysis (HSLA) design flow where
module-level statistical leakage models supplied by IP
vendors are used to improve the efficiency and capacity
of SoC statistical leakage power analysis. To solve the
challenges of incorporating spatial correlations
between IP modules at system level, we first propose a
method to extract correlation-inclusive leakage models.
Then a method to handle the spatial correlations at
system level is proposed. Using this method, the
runtime of system statistical leakage analysis (SLA)
can be significantly improved without disclosing the
netlists of the IP modules. Experimental results
demonstrate that the proposed HSLA method is about 100
times faster than gate-level full-chip SLA methods
while maintaining the accuracy. In addition, we also
investigate one application of this HSLA method, a
leakage-yield-driven floorplanning framework, to
demonstrate the benefits of such an HSLA method in
practice. Moreover, an optimized hierarchical leakage
analysis method dedicated to the floorplanning
framework is proposed. The effectiveness of the
floorplanning framework and the optimized method are
confirmed by extensive experimental results.",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{S:2016:EAD,
author = "Ramprasath S. and Vinita Vasudevan",
title = "Efficient Algorithms for Discrete Gate Sizing and
Threshold Voltage Assignment Based on an Accurate
Analytical Statistical Yield Gradient",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "66:1--66:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2896819",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we derive a simple and accurate
expression for the change in timing yield due to a
change in the gate delay distribution. It is based on
analytical bounds that we have derived for the moments
of the circuit and path delay. Based on this, we
propose computationally efficient algorithms for (1)
discrete gate sizing and (2) simultaneous gate sizing
and threshold voltage ( V$^T$ ) assignment so that the
circuit meets a timing yield specification under
parameter variations. The use of this analytical yield
gradient within a gradient-based timing yield
optimization algorithm results in a significant
improvement in the runtime as compared to the numerical
method, while achieving the same final yield. It also
allows us to explore a larger search space in each
iteration more efficiently, which is required in the
case of simultaneous resizing and V$^T$ assignment. We
also propose heuristics for resizing/changing the V$^T$
of multiple gates in each iteration. This makes it
possible to optimize the timing yield for large
circuits. Results on ITC '99 benchmarks show that the
proposed multinode resizing algorithm results in a
significant improvement in the runtime with a marginal
average area penalty and no cost to the final yield
achieved.",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2016:ERL,
author = "Hongfei Wang and R. D. (Shawn) Blanton",
title = "Ensemble Reduction via Logic Minimization",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "67:1--67:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2897515",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "An ensemble of machine learning classifiers usually
improves generalization performance and is useful for
many applications. However, the extra memory storage
and computational cost incurred from the combined
models often limits their potential applications. In
this article, we propose a new ensemble reduction
method called CANOPY that significantly reduces memory
storage and computations. CANOPY uses a technique from
logic minimization for digital circuits to select and
combine particular classification models from an
initial pool in the form of a Boolean function, through
which the reduced ensemble performs classification.
Experiments on 20 UCI datasets demonstrate that CANOPY
either outperforms or is very competitive with the
initial ensemble and one state-of-the-art ensemble
reduction method in terms of generalization error, and
is superior to all existing reduction methods surveyed
for identifying the smallest numbers of models in the
reduced ensembles.",
acknowledgement = ack-nhfb,
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2016:DTS,
author = "Irith Pomeranz",
title = "{$N$}-Detection Test Sets for Circuits with Multiple
Independent Scan Chains",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "68:1--68:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2897514",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In a circuit with multiple independent scan chains, it
is possible to operate groups of scan chains
independently in functional or shift mode. This
design-for-testability approach can be used to increase
the quality of a test set. This article describes an N
-detection test generation procedure for increasing the
quality of a transition fault test set in such a
circuit. The procedure uses the possibility of applying
the same test, with the scan chains operating in
different modes, to increase the numbers of detections
without increasing the number of tests that need to be
generated or stored on a tester. This results in
reduced input storage requirements compared with a
conventional N -detection test set and an increased
number of applied tests. The increased quality of the
test set is verified by its bridging fault coverage.",
acknowledgement = ack-nhfb,
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Won:2016:RSC,
author = "Jae-Yeon Won and Paul V. Gratz and Srinivas Shakkottai
and Jiang Hu",
title = "Resource Sharing Centric Dynamic Voltage and Frequency
Scaling for {CMP} Cores, Uncore, and Memory",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "69:1--69:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2897394",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the breakdown of Dennard's scaling over the past
decade, performance growth of modern microprocessor
design has largely relied on scaling core count in chip
multiprocessors (CMPs). The challenge of chip power
density, however, remains and demands new power
management solutions. This work investigates a
coordinated CMP systemwide Dynamic Voltage and
Frequency Scaling (DVFS) policy centered around shared
resource utilization. This approach represents a new
angle on the problem, differing from the conventional
core-workload-driven approaches. The key component of
our work is per-core DVFS leveraging a technique
similar to TCP Vegas congestion control from
networking. This TCP Vegas-based DVFS can potentially
identify the synergy between power reduction and
performance improvement. Further, this work includes
uncore (on-chip interconnect and shared last level
cache) and main memory DVFS policies coordinated with
the per-core DVFS policy. Full system simulations on
PARSEC benchmarks show that our technique reduces total
energy dissipation by over 47\% across all benchmarks
with less than 2.3\% performance degradation. Our work
also leads to 12\% more energy savings compared to a
prior work CMP DVFS policy.",
acknowledgement = ack-nhfb,
articleno = "69",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ho:2016:AAD,
author = "Ching-Hsuan Ho and Yung-Chih Chen and Chun-Yao Wang
and Ching-Yi Huang and Suman Datta and Vijaykrishnan
Narayanan",
title = "Area-Aware Decomposition for Single-Electron
Transistor Arrays",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "70:1--70:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2898998",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Single-electron transistor (SET) at room temperature
has been demonstrated as a promising device for
extending Moore's law due to its ultra-low power
consumption. Existing SET synthesis methods synthesize
a Boolean network into a large reconfigurable SET array
where the height of SET array equals the number of
primary inputs. However, recent experiments on device
level have shown that this height is restricted to a
small number, say, 10, rather than arbitrary value due
to the ultra-low driving strength of SET devices. On
the other hand, the width of an SET array is also
suggested to be a small value. Consequently, it is
necessary to decompose a large SET array into a set of
small SET arrays where each of them realizes a
sub-function of the original circuit with no more than
10 inputs. Thus, this article presents two techniques
for achieving area-efficient SET array decomposition:
One is a width minimization algorithm for reducing the
area of a single SET array; the other is a
depth-bounded mapping algorithm, which decomposes a
Boolean network into many sub-functions such that the
widths of the corresponding SET arrays are balanced.
The width minimization algorithm leads to a 25\%--41\%
improvement compared to the state of the art, and the
mapping algorithm achieves a 60\% reduction in total
area compared to a na{\"\i}ve approach.",
acknowledgement = ack-nhfb,
articleno = "70",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mao:2016:LBP,
author = "Fubing Mao and Yi-Chung Chen and Wei Zhang and Hai
(Helen) Li and Bingsheng He",
title = "Library-Based Placement and Routing in {FPGAs} with
Support of Partial Reconfiguration",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "71:1--71:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2901295",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "While traditional Field-Programmable Gate Array design
flow usually employs fine-grained tile-based placement,
modular placement is increasingly required to speed up
the large-scale placement and save the synthesis time.
Moreover, the commonly used modules can be
pre-synthesized and stored in the library for design
reuse to significantly save the design, verification
time, and development cost. Previous work mainly
focuses on modular floorplanning without module
placement information. In this article, we propose a
library-based placement and routing flow that best
utilizes the pre-placed and routed modules from the
library to significantly save the execution time while
achieving the minimal area-delay product. The flow
supports the static and reconfigurable modules at the
same time. The modular information is represented in
the B*-Tree structure, and the B*-Tree operations are
amended together with Simulated Annealing to enable a
fast search of the placement space. Different
width-height ratios of the modules are exploited to
achieve area-delay product optimization. Partial
reconfiguration-aware routing using pin-to-wire
abutment is proposed to connect the modules after
placement. Our placer can reduce the compilation time
by 65\% on average with 17\% area and 8.2\% delay
overhead compared with the fine-grained results of
Versatile Place and Route through the reuse of module
information in the library for the base architecture.
For other architectures, the area increase ranges from
8.32\% to 25.79\%, the delay varies from --- 13.66\% to
19.79\%, and the runtime improves by 43.31\% to
77.2\%.",
acknowledgement = ack-nhfb,
articleno = "71",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bernasconi:2016:IRZ,
author = "Anna Bernasconi and Valentina Ciriani",
title = "Index-Resilient Zero-Suppressed {BDDs}: Definition and
Operations",
journal = j-TODAES,
volume = "21",
number = "4",
pages = "72:1--72:??",
month = sep,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2905363",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Sep 23 15:16:20 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Zero-Suppressed Binary Decision Diagrams (ZDDs) are
widely used data structures for representing and
handling combination sets and Boolean functions. In
particular, ZDDs are commonly used in CAD for the
synthesis and verification of integrated circuits. The
purpose of this article is to design an error-resilient
version of this data structure: a self-repairing ZDD.
More precisely, we design a new ZDD canonical form,
called index-resilient reduced ZDD, such that a faulty
index can be reconstructed in time O ( k ), where k is
the number of nodes with a corrupted index. Moreover,
we propose new versions of the standard algorithms for
ZDD manipulation and construction that are error
resilient during their execution and produce an
index-resilient ZDD as output. The experimental results
validate the proposed approach.",
acknowledgement = ack-nhfb,
articleno = "72",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2016:HDT,
author = "Hai Wang and Jian Ma and Sheldon X.-D. Tan and Chi
Zhang and He Tang and Keheng Huang and Zhenghong
Zhang",
title = "Hierarchical Dynamic Thermal Management Method for
High-Performance Many-Core Microprocessors",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2891409",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "It is challenging to manage the thermal behavior of
many-core microprocessors while still keeping them
running at high performance since the control
complexity increases as the core number increases. In
this article, a novel hierarchical dynamic thermal
management method is proposed to overcome this
challenge. The new method employs model predictive
control (MPC) with task migration and a DVFS scheme to
ensure smooth control behavior and negligible computing
performance sacrifice. In order to be scalable to
many-core systems, the hierarchical control scheme is
designed with two levels. At the lower level, the cores
are spatially clustered into blocks, and local task
migration is used to match current power distribution
with the optimal distribution calculated by MPC. At the
upper level, global task migration is used with the
unmatched powers from the lower level. A modified
iterative minimum cut algorithm is used to assist the
task migration decision making if the power number is
large at the upper level. Finally, DVFS is applied to
regulate the remaining unmatched powers. Experiments
show that the new method outperforms existing methods
and is very scalable to manage many-core
microprocessors with small performance degradation.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Poddar:2016:ECS,
author = "Sudip Poddar and Sarmishtha Ghoshal and Krishnendu
Chakrabarty and Bhargab B. Bhattacharya",
title = "Error-Correcting Sample Preparation with Cyberphysical
Digital Microfluidic Lab-on-Chip",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2898999",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Digital (droplet-based) microfluidic technology offers
an attractive platform for implementing a wide variety
of biochemical laboratory protocols, such as
point-of-care diagnosis, DNA analysis, target
detection, and drug discovery. A digital microfluidic
biochip consists of a patterned array of electrodes on
which tiny fluid droplets are manipulated by electrical
actuation sequences to perform various fluidic
operations, for example, dispense, transport, mix, or
split. However, because of the inherent uncertainty of
fluidic operations, the outcome of biochemical
experiments performed on-chip can be erroneous even if
the chip is tested a priori and deemed to be
defect-free. In this article, we address an important
error recoverability problem in the context of sample
preparation. We assume a cyberphysical environment, in
which the physical errors, when detected online at
selected checkpoints with integrated sensors, can be
corrected through recovery techniques. However, almost
all prior work on error recoverability used
checkpointing-based rollback approach, that is,
re-execution of certain portions of the protocol
starting from the previous checkpoint. Unfortunately,
such techniques are expensive both in terms of assay
completion time and reagent cost, and can never ensure
full error-recovery in deterministic sense. We consider
imprecise droplet mix-split operations and present a
novel roll-forward approach where the erroneous
droplets, thus produced, are used in the error-recovery
process, instead of being discarded or remixed. All
erroneous droplets participate in the dilution process
and they mutually cancel or reduce the
concentration-error when the target droplet is reached.
We also present a rigorous analysis that reveals the
role of volumetric-error on the concentration of a
sample to be prepared, and we describe the layout of a
lab-on-chip that can execute the proposed cyberphysical
dilution algorithm. Our analysis reveals that fluidic
errors caused by unbalanced droplet splitting can be
classified as being either critical or non-critical,
and only those of the former type require correction to
achieve error-free sample dilution. Simulation
experiments on various sample preparation test cases
demonstrate the effectiveness of the proposed method.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Czerwinski:2016:SAO,
author = "Robert Czerwinski and Dariusz Kania",
title = "State Assignment and Optimization of Ultra-High-Speed
{FSMs} Utilizing Tristate Buffers",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2905366",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The logic synthesis of ultra-high-speed FSMs is
presented. The state assignment is based on a
well-known method that uses output vectors. This
technique is adjusted to include elements of two-level
minimization and takes into account the limited number
of terms contained in the programmable-AND/fixed-OR
logic cell. The state assignment is based on a special
form of the binary decision tree. The second phase of
the FSM design is logic optimization. The optimization
method is based on tristate buffers, thus making
possible a one-logic-level FSM structure. The key point
is to search partition variables that control the
tristate buffers. This technique can also be applied to
combinational circuits or the output block of FSMs
only. Algorithms for state assignment and optimization
are presented and richly illustrated by examples. The
method is dedicated to using specific features of
complex programmable logic devices. Experimental
results prove its effectiveness (e.g., the
implementation of the the 16-bit counter requires 136
logic cells and one-logic-cell level instead of 213
cells and four levels). The optimization method using
tristate buffers and a state assignment binary decision
tree can be directly applied to FPGA-dedicated logic
synthesis.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Das:2016:FBP,
author = "Shirshendu Das and Hemangee K. Kapoor",
title = "A Framework for Block Placement, Migration, and Fast
Searching in Tiled-{DNUCA} Architecture",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2907946",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Multicore processors have proliferated several domains
ranging from small-scale embedded systems to large data
centers, making tiled CMPs (TCMPs) the essential
next-generation scalable architecture. NUCA
architectures help in managing the capacity and access
time for such larger cache designs. It divides the
last-level cache (LLC) into multiple banks connected
through an on-chip network. Static NUCA (SNUCA) has a
fixed address mapping policy, whereas dynamic NUCA
(DNUCA) allows blocks to relocate nearer to the
processing cores at runtime. To allow this, DNUCA
divides the banks into multiple banksets and a block
can be placed in any bank within a particular bankset.
The entire bankset may need to be searched to access a
block. Optimal bankset searching mechanisms are
essential for getting the benefits from DNUCA. This
article proposes a DNUCA-based TCMP architecture called
TLD-NUCA. It reduces the LLC access time of TCMP and
also allows a heavily loaded bank to distribute its
load among the underused banks. Instead of other DNUCA
designs, TLD-NUCA considers larger banksets. Such
relaxations result in more uniform load distribution
than existing DNUCA-based TCMP (T-DNUCA). Considering
larger banksets improves the utilization factor, but
T-DNUCA cannot implement it because of its expensive
searching mechanism. TLD-NUCA uses a centralized
directory, called TLD, to search a block from all the
banks. Also, the proposed block placement policy
reduces the instances when the central TLD needs to be
contacted. It does not require the expensive
simultaneous search as needed by T-DNUCA. Better cache
utilization and a reduction in LLC access time improve
the miss rate as well as the average memory access time
(AMAT). Improving the miss rate and AMAT results in
improvements in cycles per instructions (CPI).
Experimental analysis found that TLD-NUCA improves
performance by 6.5\% as compared to T-DNUCA. The
improvement is 13\% as compared to the SNUCA-based TCMP
design.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wu:2016:OAW,
author = "Yu-Wei Wu and Yiyu Shi and Sudip Roy and Tsung-Yi Ho",
title = "Obstacle-Avoiding Wind Turbine Placement for Power
Loss and Wake Effect Optimization",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2905365",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As finite energy resources are being consumed at
faster rate than they can be replaced, renewable energy
resources have drawn extensive attention. Wind power
development is one such example growing significantly
throughout the world. The main difficulty in wind power
development is that wind turbines interfere with each
other. The produced turbulence-wake effect-directly
reduces the power generation. In addition, wirelength
of the collection network among wind turbines is not
merely an economic factor; it also decides power loss
on the wind farm. Moreover, in reality, obstacles
(buildings, lakes, etc.) exist on the wind farm, which
are unavoidable. Nevertheless, to the best of our
knowledge, none of the existing works consider wake
effect, wirelength, and avoidance of obstacles all
together in the wind turbine placement problem. In this
article, we propose an analytical method to obtain the
obstacle-avoiding placement of wind turbines, thus
minimizing both power loss and wake effect. We also
propose a postprocessing method to fine-tune the
solution obtained from the analytical method to find a
better solution. Simulation results show that our tool
is 12x faster than the state-of-the-art industrial tool
AWS OpenWind and 203x faster than the state-of-the-art
academic tool TDA with almost the same produced
power.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xiao:2016:HTL,
author = "K. Xiao and D. Forte and Y. Jin and R. Karri and S.
Bhunia and M. Tehranipoor",
title = "Hardware {Trojans}: Lessons Learned after One Decade
of Research",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2906147",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Given the increasing complexity of modern electronics
and the cost of fabrication, entities from around the
globe have become more heavily involved in all phases
of the electronics supply chain. In this environment,
hardware Trojans (i.e., malicious modifications or
inclusions made by untrusted third parties) pose major
security concerns, especially for those integrated
circuits (ICs) and systems used in critical
applications and cyber infrastructure. While hardware
Trojans have been explored significantly in academia
over the last decade, there remains room for
improvement. In this article, we examine the research
on hardware Trojans from the last decade and attempt to
capture the lessons learned. A comprehensive
adversarial model taxonomy is introduced and used to
examine the current state of the art. Then the past
countermeasures and publication trends are categorized
based on the adversarial model and topic. Through this
analysis, we identify what has been covered and the
important problems that are underinvestigated. We also
identify the most critical lessons for those new to the
field and suggest a roadmap for future hardware Trojan
research.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2016:PSS,
author = "Irith Pomeranz",
title = "Periodic Scan-In States to Reduce the Input Test Data
Volume for Partially Functional Broadside Tests",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2911983",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article describes a procedure for test data
compression targeting functional and partially
functional broadside tests. The scan-in state of such a
test is either a reachable state or has a known Hamming
distance from a reachable state. Reachable states are
fully specified, while the popular LFSR -based test
data compression methods require the use of
incompletely specified test cubes. The test data
compression approach considered in this article is
based on the use of periodic scan-in states. Such
states require the storage of a period that can be
significantly shorter than a scan-in state, thus
providing test data compression. The procedure computes
a set of periods that is sufficient for detecting all
the detectable target faults. Considering the scan-in
states that the periods produce, the procedure ranks
the periods based on the distances of the scan-in
states from reachable states, and the lengths of the
periods. Functional and partially functional broadside
tests are generated preferring shorter periods with
smaller Hamming distances. The results are compared
with those of an LFSR -based approach.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2016:ESM,
author = "Jinyong Lee and Ingoo Heo and Yongje Lee and Yunheung
Paek",
title = "Efficient Security Monitoring with the Core Debug
Interface in an Embedded Processor",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "8:1--8:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2907611",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "For decades, various concepts in security monitoring
have been proposed. In principle, they all in common in
regard to the monitoring of the execution behavior of a
program (e.g., control-flow or dataflow) running on the
machine to find symptoms of attacks. Among the proposed
monitoring schemes, software-based ones are known for
their adaptability on the commercial products, but
there have been concerns that they may suffer from
nonnegligible runtime overhead. On the other hand,
hardware-based solutions are recognized for their high
performance. However, most of them have an inherent
problem in that they usually mandate drastic changes to
the internal processor architecture. More recent ones
have strived to minimize such modifications by
employing external hardware security monitors in the
system. However, these approaches intrinsically suffer
from the overhead caused by communication between the
host and the external monitor. Our solution also relies
on external hardware for security monitoring, but
unlike the others, ours tackles the communication
overhead by using the core debug interface (CDI), which
is readily available in most commercial processors for
debugging. We build our system simply by plugging our
monitoring hardware into the processor via CDI,
precluding the need for altering the processor
internals. To validate the effectiveness of our
approach, we implement two well-known monitoring
techniques on our proposed framework: dynamic
information flow tracking and branch regulation. The
experimental results on our FPGA prototype show that
our external hardware monitors efficiently perform
monitoring tasks with negligible performance overhead,
mainly with thanks to the support of CDI, which helps
us reduce communication costs substantially.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2016:IPE,
author = "Yu-Ming Chang and Pi-Cheng Hsiu and Yuan-Hao Chang and
Chi-Hao Chen and Tei-Wei Kuo and Cheng-Yuan Michael
Wang",
title = "Improving {PCM} Endurance with a Constant-Cost Wear
Leveling Design",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "9:1--9:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2905364",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Improving PCM endurance is a fundamental issue when it
is considered as an alternative to replace DRAM as main
memory. Memory-based wear leveling (WL) is an effective
way to improve PCM endurance, but its major challenge
is how to efficiently determine the appropriate memory
pages for allocation or swapping. In this article, we
present a constant-cost WL design that is compatible
with existing memory management. Two implementations,
namely bucket-based and array-based WL, with
constant-time (or nearly zero) search cost are proposed
to be integrated into the OS layer and the hardware
layer, respectively, as well as to trade between time
and space complexity. The results of experiments
conducted based on an implementation in Android, as
well as simulations with popular benchmarks, to
evaluate the effectiveness of the proposed design are
very encouraging.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{He:2016:RIM,
author = "Xu He and Yao Wang and Yang Guo and Evangeline F. Y.
Young",
title = "{Ripple 2.0}: Improved Movement of Cells in
Routability-Driven Placement",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "10:1--10:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2925989",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Routability is one of the most important problems in
high-performance circuit designs. From the viewpoint of
placement design, two major factors cause routing
congestion: (i) interconnections between cells and (ii)
connections on macro blockages. In this article, we
present a routability-driven placer, Ripple 2.0, which
emphasizes both kinds of routing congestion. Several
techniques will be presented, including (i) cell
inflation with routing path consideration, (ii)
congested cluster optimization, (iii)
routability-driven cell spreading, and (iv)
simultaneous routing and placement for routability
refinement. With the official evaluation protocol,
Ripple 2.0 outperforms other published academic
routability-driven placers. Compared with top results
in the ICCAD 2012 contest, Ripple 2.0 achieves a better
detailed routing solution obtained by a commercial
router.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mazumdar:2016:CIS,
author = "Bodhisatwa Mazumdar and Sk. Subidh Ali and Ozgur
Sinanoglu",
title = "A Compact Implementation of {Salsa20} and Its Power
Analysis Vulnerabilities",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "11:1--11:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2934677",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present a compact implementation
of the Salsa20 stream cipher that is targeted towards
lightweight cryptographic devices such as
radio-frequency identification (RFID) tags. The Salsa20
stream cipher, ann addition-rotation-XOR (ARX) cipher,
is used for high-security cryptography in NEON
instruction sets embedded in ARM Cortex A8 CPU
core-based tablets and smartphones. The existing
literature shows that although classical cryptanalysis
has been effective on reduced rounds of Salsa20, the
stream cipher is immune to software side-channel
attacks such as branch timing and cache timing attacks.
To the best of our knowledge, this work is the first to
perform hardware power analysis attacks, where we
evaluate the resistance of all eight keywords in the
proposed compact implementation of Salsa20. Our
technique targets the three subrounds of the first
round of the implemented Salsa20. The correlation power
analysis (CPA) attack has an attack complexity of
2$^{19}$. Based on extensive experiments on a compact
implementation of Salsa20, we demonstrate that all
these keywords can be recovered within 20,000 queries
on Salsa20. The attacks show a varying resilience of
the key words against CPA that has not yet been
observed in any stream or block cipher in the present
literature. This makes the architecture of this stream
cipher interesting from the side-channel analysis
perspective. Also, we propose a lightweight
countermeasure that mitigates the leakage in the power
traces as shown in the results of Welch's t -test
statistics. The hardware area overhead of the proposed
countermeasure is only 14\% and is designed with
compact implementation in mind.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chakraborty:2016:PDM,
author = "Prasenjit Chakraborty and Preeti Ranjan Panda and
Sandeep Sen",
title = "Partitioning and Data Mapping in Reconfigurable Cache
and Scratchpad Memory-Based Architectures",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "12:1--12:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2934680",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scratchpad memory (SPM) is considered a useful
component in the memory hierarchy, solely or along with
caches, for meeting the power and energy constraints as
performance ceases to be the sole criteria for
processor design. Although the efficiency of SPM is
well known, its use has been restricted owing to
difficulties in programmability. Real applications
usually have regions that are amenable to exploitation
by either SPM or cache and hence can benefit if the two
are used in conjunction. Dynamically adjusting the
local memory resources to suit application demand can
significantly improve the efficiency of the overall
system. In this article, we propose a compiler
technique to map application data objects to the
SPM-cache and also partition the local memory between
the SPM and cache depending on the dynamic requirement
of the application. First, we introduce a novel
graph-based structure to tackle data allocation in an
application. Second, we use this to present a data
allocation heuristic to map program objects for a
fixed-size SPM-cache hybrid system that targets whole
program optimization. We finally extend this
formulation to adapt the SPM and cache sizes, as well
as the data allocation as per the requirement of
different application regions. We study the
applicability of the technique on various workloads
targeted at both SPM-only and hardware reconfigurable
memory systems, observing an average of 18\%
energy-delay improvement over state-of-the-art
techniques.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mehri:2016:GAB,
author = "Hossein Mehri and Bijan Alizadeh",
title = "Genetic-Algorithm-Based {FPGA} Architectural
Exploration Using Analytical Models",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "13:1--13:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2939372",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "FPGA architectural optimization has emerged as one of
the most important digital design challenges. In recent
years, experimental methods have been replaced by
analytical ones to find the optimized architecture.
Time is the main reason for this replacement.
Conventional Geometric Programming (GP) is a routine
framework to solve analytical models, including area,
delay, and power models. In this article, we discuss
the application of the Genetic Algorithm (GA) to the
design of FPGA architectures. The performance model has
been integrated into the Genetic Algorithm framework in
order to investigate the impact of various
architectural parameters on the performance efficiency
of FPGAs. This way, we are able to rapidly analyze FPGA
architectures and select the best one. The main
advantages of using GA versus GP are concurrency and
speed. The results show that concurrent optimization of
high-level architecture parameters, including lookup
table size (K) and cluster size (N), and low-level
parameters, like scaling of transistors, is possible
for GA, whereas GP does not capture K and N under its
concurrency and it needs to exhaustively search all
possible combinations of K and N. The results also show
that more than two orders of magnitude in runtime
improvement in comparison with GP-based analysis is
achieved.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gingade:2016:HPM,
author = "Ganesh Gingade and Wenyi Chen and Yung-Hsiang Lu and
Jan Allebach and Hernan Ildefonso Gutierrez-Vazquez",
title = "Hybrid Power Management for Office Equipment",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "14:1--14:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2910582",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Office machines (such as printers, scanners, facsimile
machines, and copiers) can consume significant amounts
of power. Most office machines have sleep modes to save
power. Power management of these machines is usually
timeout-based: a machine sleeps after being idle long
enough. Setting the time-out duration can be difficult:
if it is too long, the machine wastes power during
idleness. If it is too short, the machine sleeps too
soon and too often-the wake-up delay can significantly
degrade productivity. Thus, power management is a
tradeoff between saving energy and keeping response
time short. Many power management policies have been
published and one policy may outperform another in some
scenarios. There is no definite conclusion regarding
which policy is always better. This article describes
two methods for office equipment power management. The
first method adaptively reduces power based on a
constraint of the wake-up delay. The second is a hybrid
method with multiple candidate policies and it selects
the most appropriate power management policy. Using 6
months of request traces from 18 different printers, we
demonstrate that the hybrid policy outperforms
individual policies. We also discover that power
management based on business hours does not produce
consistent energy savings.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Katoen:2016:PMC,
author = "Joost-Pieter Katoen and Hao Wu",
title = "Probabilistic Model Checking for Uncertain
Scenario-Aware Data Flow",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "15:1--15:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2914788",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Scenario-Aware Dataflow (SADF) model is based on
concurrent actors that interact via channels. It
combines streaming data and control to capture
scenarios while incorporating hard and soft real-time
aspects. To model data-flow computations that are
subject to uncertainty, SADF models are equipped with
random primitives. We propose to use probabilistic
model checking to analyze uncertain SADF models. We
show how measures such as expected time, long-run
objectives like throughput, as well as timed
reachability-can a given system configuration be
reached within a deadline with high probability?-can be
automatically determined. The crux of our method is a
compositional semantics of SADF with exponential agent
execution times combined with automated abstraction
techniques akin to partial-order reduction. We present
the semantics in detail and show how it accommodates
the incorporation of execution platforms, enabling the
analysis of energy consumption. The feasibility of our
approach is illustrated by analyzing several
quantitative measures of an MPEG-4 decoder and an
industrial face recognition application.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2016:DAE,
author = "Qixiao Liu and Miquel Moreto and Jaume Abella and
Francisco J. Cazorla and Mateo Valero",
title = "{DReAM}: an Approach to Estimate per-Task {DRAM}
Energy in Multicore Systems",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "16:1--16:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2939370",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Accurate per-task energy estimation in multicore
systems would allow performing per-task energy-aware
task scheduling and energy-aware billing in data
centers, among other applications. Per-task energy
estimation is challenged by the interaction between
tasks in shared resources, which impacts tasks' energy
consumption in uncontrolled ways. Some accurate
mechanisms have been devised recently to estimate
per-task energy consumed on-chip in multicores, but
there is a lack of such mechanisms for DRAM memories.
This article makes the case for accurate per-task DRAM
energy metering in multicores, which opens new paths to
energy/performance optimizations. In particular, the
contributions of this article are (i) an ideal per-task
energy metering model for DRAM memories; (ii) DReAM, an
accurate yet low cost implementation of the ideal model
(less than 5\% accuracy error when 16 tasks share
memory); and (iii) a comparison with standard methods
(even distribution and access-count based) proving that
DReAM is much more accurate than these other methods.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Somashekar:2016:NEG,
author = "Ahish Mysore Somashekar and Spyros Tragoudas and
Rathish Jayabharathi and Sreenivas Gangadhar",
title = "Non-enumerative Generation of Path Delay Distributions
and Its Application to Critical Path Selection",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "17:1--17:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2940327",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A Monte Carlo-based approach is proposed capable of
identifying in a non-enumerative and scalable manner
the distributions that describe the delay of every path
in a combinational circuit. Furthermore, a scalable
approach to select critical paths from a potentially
exponential number of path candidates is presented.
Paths and their delay distributions are stored in Zero
Suppressed Binary Decision Diagrams. Experimental
results on some of the largest ISCAS-89 and ITC-99
benchmarks shows that the proposed method is highly
scalable and effective.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2016:ADB,
author = "Yi Wang and Zhiwei Qin and Renhai Chen and Zili Shao
and Laurence T. Yang",
title = "An Adaptive Demand-Based Caching Mechanism for {NAND}
Flash Memory Storage Systems",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "18:1--18:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2947658",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "During past decades, the capacity of NAND flash memory
has been increasing dramatically, leading to the use of
nonvolatile flash in the system's memory hierarchy. The
increasing capacity of NAND flash memory introduces a
large RAM footprint to store the logical to physical
address mapping. The demand-based approach can
effectively reduce and well control the RAM footprint.
However, extra address translation overhead is also
introduced which may degrade the system performance. In
this article, we present CDFTL, an adaptive Caching
mechanism for Demand-based Flash Translation Layer, for
NAND flash memory storage systems. CDFTL adopts both
the fine-grained entry-based caching mechanism to
exploit temporal locality and the coarse-grained
translation-page-based caching mechanism to exploit
spatial locality of workloads. By selectively caching
the on-demand address mappings and adaptively changing
the space configurations of two granularities, CDFTL
can effectively utilize the RAM space and improve the
cache hit ratio. We evaluate CDFTL under a real
hardware embedded platform using a variety of I/O
traces. Experimental results show that our technique
can achieve an 11.13\% reduction in average system
response time and a 35.21\% reduction in translation
block erase counts compared with the previous work.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nair:2016:ESP,
author = "Piyoosh Purushothaman Nair and Arnab Sarkar and N. M.
Harsha and Megha Gandhi and P. P. Chakrabarti and Sujoy
Ghose",
title = "{ERfair} Scheduler with Processor Suspension for
Real-Time Multiprocessor Embedded Systems",
journal = j-TODAES,
volume = "22",
number = "1",
pages = "19:1--19:??",
month = dec,
year = "2016",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2948979",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:29 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Proportional fair schedulers with their ability to
provide optimal schedulability along with hard
timeliness and quality-of-service guarantees on
multiprocessors form an attractive alternative in
real-time embedded systems that concurrently run a mix
of independent applications with varying timeliness
constraints. This article presents ERfair Scheduler
with Suspension on Multiprocessors (ESSM), an
efficient, optimal proportional fair scheduler that
attempts to reduce system wide energy consumption by
locally maximizing the processor suspension intervals
while not sacrificing the ERfairness timing constraints
of the system. The proposed technique takes advantage
of higher execution rates of tasks in underloaded
ERfair systems and uses a procrastination scheme to
search for time points within the schedule where
suspension intervals are locally maximal. Evaluation
results reveal that ESSM achieves good sleep efficiency
and provides up to 50\% higher effective total sleep
durations as compared to the Basic-ERfair scheduler on
systems consisting of 2 to 20 processors.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nguyen:2017:SAA,
author = "Phuong Ha Nguyen and Durga Prasad Sahoo and Rajat
Subhra Chakraborty and Debdeep Mukhopadhyay",
title = "Security Analysis of Arbiter {PUF} and Its Lightweight
Compositions Under Predictability Test",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2940326",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Unpredictability is an important security property of
Physically Unclonable Function (PUF) in the context of
statistical attacks, where the correlation between
challenge-response pairs is explicitly exploited. In
the existing literature on PUFs, the Hamming Distance
Test, denoted by HDT(t), was proposed to evaluate the
unpredictability of PUFs, which is a simplified case of
the Propagation Criterion test PC(t). The objective of
these test schemes is to estimate the output transition
probability when there are t or fewer than t bits
flips, and ideally this probability value should be
0.5. In this work, we show that aforementioned two test
schemes are not enough to ensure the unpredictability
of a PUF design. We propose a new test, which is
denoted as HDT(e, t). This test scheme is a fine-tuned
version of the previous schemes, as it considers the
flipping bit pattern vector e along with parameter t.
As a contribution, we provide a comprehensive
discussion and analytic interpretation of HDT(t),
PC(t), and HDT(e, t) test schemes for Arbiter PUF
(APUF), Exclusive-OR (XOR) PUF, and Lightweight Secure
PUF (LSPUF). Our analysis establishes that HDT(e, t)
test is more general in comparison with HDT(t) and
PC(t) tests. In addition, we demonstrate a few
scenarios where the adversary can exploit the
information obtained from the analysis of HDT(e, t)
properties of APUF, XOR PUF, and LSPUF to develop
statistical attacks on them, if the ideal value of
HDT(e, t) = 0.5 is not achieved for a given PUF. We
validate our theoretical observations using the
simulated and Field Programmable Gate Array (FPGA)
implemented APUF, XOR PUF, and LSPUF designs.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhu:2017:CCA,
author = "Di Zhu and Siyu Yue and Massoud Pedram and Lizhong
Chen",
title = "{CALM}: Contention-Aware Latency-Minimal Application
Mapping for Flattened Butterfly On-Chip Networks",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2950045",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the emergence of many-core multiprocessor
system-on-chips (MPSoCs), on-chip networks are facing
serious challenges in providing fast communication
among various tasks and cores. One promising on-chip
network design approach shown in recent studies is to
add express channels to traditional mesh network as
shortcuts to bypass intermediate routers, thereby
reducing packet latency. This approach not only changes
the packet latency models, but also greatly affects
network traffic behaviors, both of which have not been
fully exploited in existing mapping algorithms. In this
article, we explore the opportunities in optimizing
application mapping for flattened butterfly, a popular
express channel-based on-chip network. Specifically, we
identify the unique characteristics of flattened
butterfly, analyze the opportunities and new
challenges, and propose an efficient heuristic mapping
algorithm. The proposed algorithm Contention-Aware
Latency Minimal (CALM) is able to reduce unnecessary
turns that would otherwise impose additional router
pipeline latency to packets, as well as adjust
forwarding traffic to reduce network contention
latency. Simulation results show that the proposed
algorithm can achieve, on average, 3.4X reduction in
the number of turns, 24.8\% reduction in contention
latency, and 14.12\% reduction in the overall packet
latency.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Azarbad:2017:SSB,
author = "Mohammad Reza Azarbad and Bijan Alizadeh",
title = "Scalable {SMT-Based} Equivalence Checking of Nested
Loop Pipelining in Behavioral Synthesis",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2953879",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present a novel methodology based
on SMT-solvers to verify equality of a high-level
described specification and a pipelined RTL
implementation produced by a high-level synthesis tool.
The complex transformations existing in the high-level
synthesis process, such as nested loop pipelining,
cause the conventional methods of equivalence checking
to be inefficient. The proposed equivalence checking
method simultaneously attacks the two problems in this
context: (1) state space explosion and (2) complex
high-level synthesis transformations. To show the
scalability and efficiency of the proposed method, the
verification results of large designs are compared with
those of the SAT-based method, including three
different state-of-the-art SAT-solvers: the SMT-based
procedure, the modular Horner expansion diagram
(M-HED)-based method, and the M-HED partitioning
approach. The results show 2470$ \times $, 2540$ \times
$, and 142$ \times $ average memory usage reduction and
252$ \times $, 28$ \times $, and 914$ \times $ speedup
in comparison with M-HED, M-HED partitioning, and
SMT-solver without using the proposed method,
respectively.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2017:OIM,
author = "Qingling Zhao and Zaid Al-Bayati and Zonghua Gu and
Haibo Zeng",
title = "Optimized Implementation of Multirate
Mixed-Criticality Synchronous Reactive Models",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2968445",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Model-based design using Synchronous Reactive (SR)
models enables early design and verification of
application functionality in a platform-independent
manner, and the implementation on the target platform
should guarantee the preservation of application
semantic properties. Mixed-Criticality Scheduling (MCS)
is an effective approach to addressing diverse
certification requirements of safety-critical systems
that integrate multiple subsystems with different
levels of criticality. This article considers
fixed-priority scheduling of mixed-criticality SR
models, and considers two scheduling approaches:
Adaptive MCS and Elastic MCS. We formulate the
optimization problem of minimizing the total system
cost of added functional delays in the implementation
while guaranteeing schedulability, and present an
optimal algorithm based on branch-and-bound search, and
an efficient heuristic algorithm.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ali:2017:RCD,
author = "Hazem Ismail Ali and Sander Stuijk and Benny Akesson
and Lu{\'\i}s Miguel Pinho",
title = "Reducing the Complexity of Dataflow Graphs Using
Slack-Based Merging",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2956232",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "There exist many dataflow applications with timing
constraints that require real-time guarantees on safe
execution without violating their deadlines. Extraction
of timing parameters (offsets, deadlines, periods) from
these applications enables the use of real-time
scheduling and analysis techniques, and provides
guarantees on satisfying timing constraints. However,
existing extraction techniques require the
transformation of the dataflow application from highly
expressive dataflow computational models, for example,
Synchronous Dataflow (SDF) and Cyclo-Static Dataflow
(CSDF) to Homogeneous Synchronous Dataflow (HSDF). This
transformation can lead to an exponential increase in
the size of the application graph that significantly
increases the runtime of the analysis. In this article,
we address this problem by proposing an offline
heuristic algorithm called slack-based merging. The
algorithm is a novel graph reduction technique that
helps in speeding up the process of timing parameter
extraction and finding a feasible real-time schedule,
thereby reducing the overall design time of the
real-time system. It uses two main concepts: (a) the
difference between the worst-case execution time of the
SDF graph's firings and its timing constraints (slack)
to merge firings together and generate a reduced-size
HSDF graph, and (b) the novel concept of merging called
safe merge, which is a merge operation that we formally
prove cannot cause a live HSDF graph to deadlock. The
results show that the reduced graph (1) respects the
throughput and latency constraints of the original
application graph and (2) typically speeds up the
process of extracting timing parameters and finding a
feasible real-time schedule for real-time dataflow
applications. They also show that when the throughput
constraint is relaxed with respect to the maximal
throughput of the graph, the merging algorithm is able
to achieve a larger reduction in graph size, which in
turn results in a larger speedup of the real-time
scheduling algorithms.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Mundhenk:2017:SAN,
author = "Philipp Mundhenk and Andrew Paverd and Artur Mrowca
and Sebastian Steinhorst and Martin Lukasiewycz and
Suhaib A. Fahmy and Samarjit Chakraborty",
title = "Security in Automotive Networks: Lightweight
Authentication and Authorization",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2960407",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With the increasing amount of interconnections between
vehicles, the attack surface of internal vehicle
networks is rising steeply. Although these networks are
shielded against external attacks, they often do not
have any internal security to protect against malicious
components or adversaries who can breach the network
perimeter. To secure the in-vehicle network, all
communicating components must be authenticated, and
only authorized components should be allowed to send
and receive messages. This is achieved through the use
of an authentication framework. Cryptography is widely
used to authenticate communicating parties and provide
secure communication channels (e.g., Internet
communication). However, the real-time performance
requirements of in-vehicle networks restrict the types
of cryptographic algorithms and protocols that may be
used. In particular, asymmetric cryptography is
computationally infeasible during vehicle operation. In
this work, we address the challenges of designing
authentication protocols for automotive systems. We
present Lightweight Authentication for Secure
Automotive Networks (LASAN), a full lifecycle
authentication approach. We describe the core LASAN
protocols and show how they protect the internal
vehicle network while complying with the real-time
constraints and low computational resources of this
domain. By leveraging the fixed structure of automotive
networks, we minimize bandwidth and computation
requirements. Unlike previous work, we also explain how
this framework can be integrated into all aspects of
the automotive product lifecycle, including
manufacturing, vehicle maintenance, and software
updates. We evaluate LASAN in two different ways:
First, we analyze the security properties of the
protocols using established protocol verification
techniques based on formal methods. Second, we evaluate
the timing requirements of LASAN and compare these to
other frameworks using a new highly modular discrete
event simulator for in-vehicle networks, which we have
developed for this evaluation.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhang:2017:RTV,
author = "Xianwei Zhang and Youtao Zhang and Bruce R. Childers
and Jun Yang",
title = "On the Restore Time Variations of Future {DRAM}
Memory",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2967609",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the de facto main memory standard, DRAM (Dynamic
Random Access Memory) has achieved dramatic density
improvement in the past four decades, along with the
advancements in process technology. Recent studies
reveal that one of the major challenges in scaling DRAM
into the deep sub-micron regime is its significant
variations on cell restore time, which affect timing
constraints such as write recovery time. Adopting
traditional approaches results in either low yield rate
or large performance degradation. In this article, we
propose schemes to expose the variations to the
architectural level. By constructing memory chunks with
different access speeds and, in particular, exploiting
the performance benefits of fast chunks, a
variation-aware memory controller can effectively
mitigate the performance loss due to relaxed timing
constraints. We then proposed restore-time-aware rank
construction and page allocation schemes to make better
use of fast chunks. Our experimental results show that,
compared to traditional designs such as row sparing and
Error Correcting Codes, the proposed schemes help to
improve system performance by about 16\% and 20\%,
respectively, for 20nm and 14nm technology nodes on a
four-core multiprocessor system.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2017:HDP,
author = "Ye-Jyun Lin and Chia-Lin Yang and Hsiang-Pang Li and
Cheng-Yuan Michael Wang",
title = "A Hybrid {DRAM\slash PCM} Buffer Cache Architecture
for {Smartphones} with {QoS} Consideration",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2979143",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Flash memory is widely used in mobile phones to store
contact information, application files, and other types
of data. In an operating system, the buffer cache keeps
the I/O blocks in dynamic random access memory (DRAM)
to reduce the slow flash accesses. However, in
smartphones, we observed two issues which reduce the
benefits of the buffer cache. First, a large number of
synchronous writes force writing the data from the
buffer cache to flash frequently. Second, the large
amount of I/O accesses from background applications
diminishes the buffer cache efficiency of the
foreground application, which degrades the
quality-of-service (QoS). In this article, we propose a
buffer cache architecture with hybrid DRAM and phase
change memory (PCM) memory, which improves the I/O
performance and QoS for smartphones. We use a DRAM
first-level buffer cache to provide high buffer cache
performance and a PCM last-level buffer cache to reduce
the impact of frequent synchronous writes. Based on the
proposed hierarchical buffer cache architecture, we
propose a sub-block management and background flush to
reduce the impact of the PCM write limitation and the
dirty block write-back overhead, respectively. To
improve the QoS, we propose a least-recently-activated
first replacement policy (LRA) to keep the data from
the applications that are most likely to become the
foreground one. The experimental results show that with
the proposed mechanisms, our hierarchical buffer cache
can improve the I/O response time by 20\% compared to
the conventional buffer cache. The proposed LRA can
improve the foreground application performance by 1.74x
compared to the conventional CLOCK policy.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Su:2017:EMC,
author = "Hang Su and Dakai Zhu and Scott Brandt",
title = "An Elastic Mixed-Criticality Task Model and
Early-Release {EDF} Scheduling Algorithms",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2984633",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Many algorithms have recently been studied for
scheduling mixed-criticality (MC) tasks. However, most
existing MC scheduling algorithms guarantee the timely
executions of high-criticality (HC) tasks at the
expense of discarding low-criticality (LC) tasks, which
can cause serious service interruption for such tasks.
In this work, aiming at providing guaranteed services
for LC tasks, we study an elastic mixed-criticality
(E-MC) task model for dual-criticality systems.
Specifically, the model allows each LC task to specify
its maximum period (i.e., minimum service level) and a
set of early-release points. We propose an
early-release (ER) mechanism that enables LC tasks to
be released more frequently and thus improve their
service levels at runtime, with both conservative and
aggressive approaches to exploiting system slack being
considered, which is applied to both earliest deadline
first (EDF) and preference-oriented earliest-deadline
schedulers. We formally prove the correctness of the
proposed early-release--earliest deadline first
scheduler on guaranteeing the timeliness of all tasks
through judicious management of the early releases of
LC tasks. The proposed model and schedulers are
evaluated through extensive simulations. The results
show that by moderately relaxing the service
requirements of LC tasks in MC task sets (i.e., by
having LC tasks' maximum periods in the E-MC model be
two to three times their desired MC periods), most
transformed E-MC task sets can be successfully
scheduled without sacrificing the timeliness of HC
tasks. Moreover, with the proposed ER mechanism, the
runtime performance of tasks (e.g., execution
frequencies of LC tasks, response times, and jitters of
HC tasks) can be significantly improved under the ER
schedulers when compared to that of the
state-of-the-art earliest deadline first-virtual
deadline scheduler.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2017:CSL,
author = "Irith Pomeranz",
title = "Computation of Seeds for {LFSR}-Based $n$-Detection
Test Generation",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2994144",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article describes a new procedure that generates
seeds for LFSR-based test generation when the goal is
to produce an $n$-detection test set. The procedure
does not use test cubes in order to avoid the situation
where a seed does not exist for a given test cube with
a given LFSR. Instead, the procedure starts from a set
of seeds that produces a one-detection test set. It
modifies seeds to obtain new seeds such that the tests
they produce increase the numbers of detections of
target faults. The modification procedure also
increases the number of faults that each additional
seed detects. Experimental results are presented to
demonstrate the effectiveness of the procedure.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hankendi:2017:SCS,
author = "Can Hankendi and Ayse Kivilcim Coskun",
title = "Scale \& Cap: Scaling-Aware Resource Management for
Consolidated Multi-threaded Applications",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2994145",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "As the number of cores per server node increases,
designing multi-threaded applications has become
essential to efficiently utilize the available hardware
parallelism. Many application domains have started to
adopt multi-threaded programming; thus, efficient
management of multi-threaded applications has become a
significant research problem. Efficient execution of
multi-threaded workloads on cloud environments, where
applications are often consolidated by means of
virtualization, relies on understanding the
multi-threaded specific characteristics of the
applications. Furthermore, energy cost and power
delivery limitations require data center server nodes
to work under power caps, which bring additional
challenges to runtime management of consolidated
multi-threaded applications. This article proposes a
dynamic resource allocation technique for consolidated
multi-threaded applications for power-constrained
environments. Our technique takes into account
application characteristics specific to multi-threaded
applications, such as power and performance scaling, to
make resource distribution decisions at runtime to
improve the overall performance, while accurately
tracking dynamic power caps. We implement and evaluate
our technique on state-of-the-art servers and show that
the proposed technique improves the application
performance by up to 21\% under power caps compared to
a default resource manager.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Backer:2017:SFT,
author = "Jerry Backer and David Hely and Ramesh Karri",
title = "Secure and Flexible Trace-Based Debugging of
Systems-on-Chip",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2994601",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This work tackles the conflict between enforcing
security of a system-on-chip (SoC) and providing
observability during trace-based debugging. On one
hand, security objectives require that assets remain
confidential at different stages of the SoC life cycle.
On the other hand, the trace-based debug infrastructure
exposes values of internal signals that can leak the
assets to untrusted third parties. We propose a secure
trace-based debug infrastructure to resolve this
conflict. The secure infrastructure tags each asset to
identify its owner (to whom it can be exposed during
debug) and nonintrusively enforces the confidentiality
of the assets during runtime debug. We implement a
prototype of the enhanced infrastructure on an FPGA to
validate its functional correctness. ASIC estimations
show that our approach incurs practical area and power
costs.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Latifis:2017:MVC,
author = "Ioannis Latifis and Karthick Parashar and Grigoris
Dimitroulakos and Hans Cappelle and Christakis Lezos
and Konstantinos Masselos and Francky Catthoor",
title = "A {MATLAB} Vectorizing Compiler Targeting
Application-Specific Instruction Set Processors",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "32:1--32:28",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2996182",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article discusses a MATLAB-to-C vectorizing
compiler that exploits custom instructions, for
example, for Single Instruction Multiple Data (SIMD)
processing and instructions for complex arithmetic
present in Application-Specific Instruction Set
Processors (ASIPs). Custom instructions are represented
via specialized intrinsic functions in the generated
code, and the generated code can be used as input to
any C/C++ compiler supporting the target processor.
Furthermore, the specialized instruction set of the
target processor is described in a parameterized way
using a target processor-independent architecture
description approach, thus allowing the support of any
processor. The compiler has been used for the
generation of application code for two different ASIPs
for several benchmarks. The code generated by the
compiler achieves a speedup between 2$ \times $ --74$
\times $ and 2$ \times $ --97$ \times $ compared to the
code generated by the MathWorks MATLAB-to-C compiler.
Experimental results also prove that the compiler
efficiently exploits SIMD custom instructions achieving
a 3.3 factor speedup compared to cases where no SIMD
processing is used. Thus the compiler can be employed
to reduce the development time/effort/cost and time to
market through raising the abstraction of application
design in an embedded systems/system-on-chip
development context.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Santos:2017:SMH,
author = "Rui Santos and Shyamsundar Venkataraman and Akash
Kumar",
title = "Scrubbing Mechanism for Heterogeneous Applications in
Reconfigurable Devices",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2997646",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Commercial off-the-shelf (COTS) reconfigurable devices
have been recognized as one of the most suitable
processing devices to be applied in nano-satellites,
since they can satisfy and combine their most important
requirements, namely processing performance,
reconfigurability, and low cost. However, COTS
reconfigurable devices, in particular Static-RAM Field
Programmable Gate Arrays, can be affected by cosmic
radiation, compromising the overall nano-satellite
reliability. Scrubbing has been proposed as a mechanism
to repair faults in configuration memory. However, the
current scrubbing mechanisms are predominantly static,
unable to adapt to heterogeneous applications and their
runtime variations. In this article, a dynamically
adaptive scrubbing mechanism is proposed. Through a
window-based scrubbing scheduling, this mechanism
adapts the scrubbing process to heterogeneous
applications (composed of periodic/sporadic and
streaming/DSP (Digital Signal Processing) tasks), as
well as their reconfigurations and modifications at
runtime. Conducted simulation experiments show the
feasibility and the efficiency of the proposed solution
in terms of system reliability metric and memory
overhead.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Enrici:2017:MDE,
author = "Andrea Enrici and Ludovic Apvrille and Renaud
Pacalet",
title = "A Model-Driven Engineering Methodology to Design
Parallel and Distributed Embedded Systems",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "34:1--34:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2999537",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In Model-Driven Engineering system-level approaches,
the design of communication protocols and patterns is
subject to the design of processing operations
(computations) and to their mapping onto execution
resources. However, this strategy allows us to capture
simple communication schemes (e.g.,
processor-bus-memory) and prevents us from evaluating
the performance of both computations and communications
(e.g., impact of application traffic patterns onto the
communication interconnect) in a single step. To solve
these issues, we introduce a novel design approach-the
$ \Psi $-chart-where we design communication patterns
and protocols independently of a system's functionality
and resources, via dedicated models. At the mapping
step, both application and communication models are
bound to the platform resources and transformed to
explore design alternatives for both computations and
communications. We present the $ \Psi $-chart and its
implementation (i.e., communication models and Design
Space Exploration) in TTool/DIPLODOCUS, a Unified
Modeling Language (UML)/SysML framework for the
modeling, simulation, formal verification and automatic
code generation of data-flow embedded systems. The
effectiveness of our solution in terms of better design
quality (e.g., portability, time) is demonstrated with
the design of the physical layer of a ZigBee (IEEE
802.15.4) transmitter onto a multi-processor
architecture.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Basten:2017:SSI,
author = "Twan Basten and Orlando Moreira and Robert de Groote",
title = "Special Section: Integrating Dataflow, Embedded
Computing and Architecture",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "35:1--35:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3023455",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Choi:2017:WCR,
author = "Junchul Choi and Soonhoi Ha",
title = "Worst-Case Response Time Analysis of a Synchronous
Dataflow Graph in a Multiprocessor System with
Real-Time Tasks",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "36:1--36:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2997644",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a novel technique that
estimates a tight upper bound of the worst-case
response time (WCRT) of a synchronous dataflow (SDF)
graph when the SDF graph shares processors with other
real-time tasks. When an SDF graph is executed at
runtime under a self-timed or static assignment
scheduling policy on a multi-processor system, static
scheduling of the SDF graph does not guarantee the
satisfaction of latency constraints since changes to
the schedule may result in timing anomalies. To
estimate the WCRT of an SDF graph with a given mapping
and scheduling result, we first construct a task
instance dependency graph that depicts the dependency
between node executions in a static schedule. The
proposed technique combines two techniques in a novel
way: schedule time bound analysis and response time
analysis. The former is used to consider the
interference between task instances in the same SDF
graph, and the latter is used to consider the
interference from other real-time tasks. Through
extensive experiments with synthetic examples and
benchmarks, we verify the superior performance of the
proposed technique compared to other existent
techniques.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jung:2017:MSM,
author = "Hanwoong Jung and Hyunok Oh and Soonhoi Ha",
title = "Multiprocessor Scheduling of a Multi-Mode Dataflow
Graph Considering Mode Transition Delay",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "37:1--37:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2997645",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Synchronous Data Flow (SDF) model is widely used
for specifying signal processing or streaming
applications. Since modern embedded applications become
more complex with dynamic behavior changes at runtime,
several extensions of the SDF model have been proposed
to specify the dynamic behavior changes while
preserving static analyzability of the SDF model. They
assume that an application has a finite number of
behaviors (or modes), and each behavior (mode) is
represented by an SDF graph. They are classified as
multi-mode dataflow models in this article. While there
exist several scheduling techniques for multi-mode
dataflow models, no one allows task migration between
modes. By observing that the resource requirement can
be additionally reduced if task migration is allowed,
we propose a multiprocessor scheduling technique of a
multi-mode dataflow graph considering task migration
between modes. Based on a genetic algorithm, the
proposed technique schedules all SDF graphs in all
modes simultaneously to minimize the resource
requirement. To satisfy the throughput constraint, the
proposed technique calculates the actual throughput
requirement of each mode and the output buffer size for
tolerating throughput jitter. We compare the proposed
technique with a method that analyzes SDF graphs in
each execution mode separately, a method that does not
allow task migration, and a method that does not allow
mode-overlapped schedule for synthetic examples and
five real applications: H.264 decoder, lane detection,
vocoder, MP3 decoder, and printer pipeline.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bouakaz:2017:SPD,
author = "Adnan Bouakaz and Pascal Fradet and Alain Girault",
title = "A Survey of Parametric Dataflow Models of
Computation",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "38:1--38:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2999539",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Dataflow models of computation (MoCs) are widely used
to design embedded signal processing and streaming
systems. Dozens of dataflow MoCs have been proposed in
the past few decades. More recently, several parametric
dataflow MoCs have been presented as an interesting
tradeoff between analyzability and expressiveness. They
offer a controlled form of dynamism under the form of
parameters (e.g., parametric rates), along with runtime
parameter configuration. This survey provides a
comprehensive description of the existing parametric
dataflow MoCs (constructs, constraints, properties,
static analyses) and compares them using a common
example. The main objectives are to help designers of
streaming applications choose the most suitable model
for their needs and pave the way for the design of new
parametric MoCs.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bouakaz:2017:SAD,
author = "Adnan Bouakaz and Pascal Fradet and Alain Girault",
title = "Symbolic Analyses of Dataflow Graphs",
journal = j-TODAES,
volume = "22",
number = "2",
pages = "39:1--39:??",
month = mar,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3007898",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The synchronous dataflow model of computation is
widely used to design embedded stream-processing
applications under strict quality-of-service
requirements (e.g., buffering size, throughput,
input-output latency). The required analyses can either
be performed at compile time (for design space
exploration) or at runtime (for resource management and
reconfigurable systems). However, these analyses have
an exponential time complexity, which may cause a huge
runtime overhead or make design space exploration
unacceptably slow. In this article, we argue that
symbolic analyses are more appropriate since they
express the system performance as a function of
parameters (i.e., input and output rates, execution
times). Such functions can be quickly evaluated for
each different configuration or checked with respect to
different quality-of-service requirements. We provide
symbolic analyses for computing the maximal throughput
of acyclic synchronous dataflow graphs, the minimum
required buffers for which as soon as possible (ASAP)
scheduling achieves this throughput, and finally, the
corresponding input-output latency of the graph. The
article first investigates these problems for a single
parametric edge. The results are extended to general
acyclic graphs using linear approximation techniques.
We assess the proposed analyses experimentally on both
synthetic and real benchmarks.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Park:2017:HHC,
author = "Jaehyun Park and Seungcheol Baek and Hyung Gyu Lee and
Chrysostomos Nicopoulos and Vinson Young and Junghee
Lee and Jongman Kim",
title = "{HoPE}: Hot-Cacheline Prediction for Dynamic Early
Decompression in Compressed {LLCs}",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "40:1--40:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2999538",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Data compression plays a pivotal role in improving
system performance and reducing energy consumption,
because it increases the logical effective capacity of
a compressed memory system without physically
increasing the memory size. However, data compression
techniques incur some cost, such as non-negligible
compression and decompression overhead. This overhead
becomes more severe if compression is used in the
cache. In this article, we aim to minimize the read-hit
decompression penalty in compressed Last-Level Caches
(LLCs) by speculatively decompressing frequently used
cachelines. To this end, we propose a Hot-cacheline
Prediction and Early decompression (HoPE) mechanism
that consists of three synergistic techniques:
Hot-cacheline Prediction (HP), Early Decompression
(ED), and Hit-history-based Insertion (HBI). HP and HBI
efficiently identify the hot compressed cachelines,
while ED selectively decompresses hot cachelines, based
on their size information. Unlike previous approaches,
the HoPE framework considers the performance
balance/tradeoff between the increased effective cache
capacity and the decompression penalty. To evaluate the
effectiveness of the proposed HoPE mechanism, we run
extensive simulations on memory traces obtained from
multi-threaded benchmarks running on a full-system
simulation framework. We observe significant
performance improvements over compressed cache schemes
employing the conventional Least-Recently Used (LRU)
replacement policy, the Dynamic Re-Reference Interval
Prediction (DRRIP) scheme, and the Effective Capacity
Maximizer (ECM) compressed cache management mechanism.
Specifically, HoPE exhibits system performance
improvements of approximately 11\%, on average, over
LRU, 8\% over DRRIP, and 7\% over ECM by reducing the
read-hit decompression penalty by around 65\%, over a
wide range of applications.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tang:2017:PPE,
author = "Li Tang and Richard F. Barrett and Jeanine Cook and X.
Sharon Hu",
title = "{PeaPaw}: Performance and Energy-Aware Partitioning of
Workload on Heterogeneous Platforms",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "41:1--41:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2999540",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Performance and energy are two major concerns for
application development on heterogeneous platforms. It
is challenging for application developers to fully
exploit the performance/energy potential of
heterogeneous platforms. One reason is the lack of
reliable prediction of the system's performance/energy
before application implementation. Another reason is
that a heterogeneous platform presents a large design
space for workload partitioning between different
processors. To reduce such development cost, this
article proposes a framework, PeaPaw, to assist
application developers to identify a workload partition
(WP) that has high potential leading to high
performance or energy efficiency before actual
implementation. The PeaPaw framework includes both
analytical performance/energy models and two sets of
workload partitioning guidelines. Based on the design
goal, application developers can obtain a workload
partitioning guideline from PeaPaw for a given platform
and follow it to design one or multiple WPs for a given
workload. Then PeaPaw can be used to estimate the
performance/energy of the designed WPs, and the WP with
the best estimated performance/energy can be selected
for actual implementation. To demonstrate the
effectiveness of PeaPaw, we have conducted three case
studies. Results from these case studies show that
PeaPaw can faithfully estimate the performance/energy
relationships of WPs and provide effective workload
partitioning guidelines.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2017:CCS,
author = "Kun Yang and Domenic Forte and Mark M. Tehranipoor",
title = "{CDTA}: a Comprehensive Solution for Counterfeit
Detection, Traceability, and Authentication in the
{IoT} Supply Chain",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "42:1--42:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3005346",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Internet of Things (IoT) is transforming the way
we live and work by increasing the connectedness of
people and things on a scale that was once
unimaginable. However, the vulnerabilities in the IoT
supply chain have raised serious concerns about the
security and trustworthiness of IoT devices and
components within them. Testing for device provenance,
detection of counterfeit integrated circuits (ICs) and
systems, and traceability of IoT devices are
challenging issues to address. In this article, we
develop a novel radio-frequency identification
(RFID)-based system suitable for counterfeit detection,
traceability, and authentication in the IoT supply
chain called CDTA. CDTA is composed of different types
of on-chip sensors and in-system structures that
collect necessary information to detect multiple
counterfeit IC types (recycled, cloned, etc.), track
and trace IoT devices, and verify the overall system
authenticity. Central to CDTA is an RFID tag employed
as storage and a channel to read the information from
different types of chips on the printed circuit board
(PCB) in both power-on and power-off scenarios. CDTA
sensor data can also be sent to the remote server for
authentication via an encrypted Ethernet channel when
the IoT device is deployed in the field. A novel board
ID generator is implemented by combining outputs of
physical unclonable functions (PUFs) embedded in the
RFID tag and different chips on the PCB. A light-weight
RFID protocol is proposed to enable mutual
authentication between RFID readers and tags. We also
implement a secure interchip communication on the PCB.
Simulations and experimental results using Spartan 3E
FPGAs demonstrate the effectiveness of this system. The
efficiency of the radio-frequency (RF) communication
has also been verified via a PCB prototype with a
printed slot antenna.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2017:GTS,
author = "Irith Pomeranz",
title = "Generation of Transparent-Scan Sequences for Diagnosis
of Scan Chain Faults",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3007207",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Diagnosis of scan chain faults is important for yield
learning and improvement. Procedures that generate
tests for diagnosis of scan chain faults produce
scan-based tests with one or more functional capture
cycles between a scan-in and a scan-out operation. The
approach to test generation referred to as
transparent-scan has several advantages in this
context. (1) It allows functional capture cycles and
scan shift cycles to be interleaved arbitrarily. This
increases the flexibility to assign to the scan cells
values that are needed for diagnosis. (2) Test
generation under transparent-scan considers a circuit
model where the scan logic is included explicitly.
Consequently, the test generation procedure takes into
consideration the full effect of a scan chain fault. It
thus produces accurate tests. (3) For the same reason,
it can also target faults inside the scan logic. (4)
Transparent-scan results in compact test sequences.
Compaction is important because of the large volumes of
fail data that scan chain faults create. The cost of
transparent-scan is that it requires simulation
procedures for sequential circuits, and that arbitrary
sequences would be applicable to the scan select input.
Motivated by the advantages of transparent-scan, and
the importance of diagnosing scan chain faults, this
article describes a procedure for generating
transparent-scan sequences for diagnosis of scan chain
faults. The procedure is also applied to produce
transparent-scan sequences for diagnosis of faults
inside the scan logic.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vatanparvar:2017:ASR,
author = "Korosh Vatanparvar and Mohammad Abdullah {Al
Faruque}",
title = "Application-Specific Residential Microgrid Design
Methodology",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "44:1--44:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3007206",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In power systems, the traditional, non-interactive,
and manually controlled power grid has been transformed
to a cyber-dominated smart grid. This cyber-physical
integration has provided the smart grid with
communication, monitoring, computation, and controlling
capabilities to improve its reliability, energy
efficiency, and flexibility. A microgrid is a localized
and semi-autonomous group of smart energy systems that
utilizes the above-mentioned capabilities to drive
modern technologies such as electric vehicle charging,
home energy management, and smart appliances. Design,
upgrading, test, and verification of these microgrids
can get too complicated to handle manually. The
complexity is due to the wide range of solutions and
components that are intended to address the microgrid
problems. This article presents a novel Model-Based
Design (MBD) methodology to model, co-simulate, design,
and optimize microgrid and its multi-level controllers.
This methodology helps in the design, optimization, and
validation of a microgrid for a specific application.
The application rules, requirements, and design-time
constraints are met in the designed/optimized microgrid
while the implementation cost is minimized. Based on
our novel methodology, a design automation,
co-simulation, and analysis tool, called GridMAT, is
implemented. Our experiments have illustrated that
implementing a hierarchical controller reduces the
average power consumption by 8\% and shifts the peak
load for cost saving. Moreover, optimizing the
microgrid design using our MBD methodology considering
smart controllers has decreased the total
implementation cost. Compared to the conventional
methodology, the cost decreases by 14\% and compared to
the MBD methodology where smart controllers are not
considered, it decreases by 5\%.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2017:LAE,
author = "Jin-Tai Yan",
title = "Layer Assignment of Escape Buses with Consecutive
Constraints in {PCB} Designs",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "45:1--45:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3012010",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "It is important for cost and reliability consideration
to minimize the number of the used layers in a PCB
design. In this article, given a set of n circular
escape buses with their escape directions between two
adjacent components and a set of m consecutive
constraints on the escape buses, the problem of
assigning the given escape buses between two adjacent
components onto the minimized layers is first
formulated for bus-oriented escape routing.
Furthermore, an efficient approach is proposed to
minimize the number of the used layers for the given
escape buses with the consecutive constraints and
assign the escape buses onto the available layers.
Compared with Yan's approach [Yan and Chen 2012] for
the layer assignment of the linear escape buses with no
consecutive constraint and Ma's approach [Ma et al.
2011a] for the layer assignment of the circular escape
buses with consecutive constraints, the experimental
results show that the proposed approach obtains the
same optimal results on the number of the used layers
and reduces 43.6\% and 90.5\% of CPU time for the
tested examples on the average, respectively.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Peng:2017:LSA,
author = "Yin-Chi Peng and Chien-Chih Chen and Hsiang-Jen Tsai
and Keng-Hao Yang and Pei-Zhe Huang and Shih-Chieh
Chang and Wen-Ben Jone and Tien-Fu Chen",
title = "{Leak Stopper}: an Actively Revitalized Snoop Filter
Architecture with Effective Generation Control",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "46:1--46:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3015770",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "To alleviate high energy dissipation of unnecessary
snooping accesses, snoop filters have been designed to
reduce snoop lookups. These filters have the problem of
decreasing filtering efficiency, and thus usually rely
on partial or whole filter reset by detecting block
evictions. Unfortunately, the reset conditions occur
infrequently or unevenly (called passive filter
deletion). This work proposes the concept of
revitalized snoop filter (RSF) design, which can
actively renew the destination filter by employing a
generation wrapping-around scheme for various reference
behaviors. We further utilize a sampling mechanism for
RSF to timely trigger precise filter revitalizations,
so that unnecessary RSF flushing can be minimized. The
proposed RSF can be integrated to various existent
inclusive snoop filters with only a minor change to
their designs. We evaluate our proposed design and
demonstrate that RSF eliminates 58.6\% of snoop energy
compared to JETTY on average while inducing only 6.5\%
of revitalization energy overhead. In addition, RSF
eliminates 45.5\% of snoop energy compared to stream
registers on average and only induces 2.5\% of
revitalization energy overhead. Overall, these RSFs
reduce the total L2 cache energy consumption by 52.1\%
(58.6\% --- 6.5\%) as compared to JETTY and by 43\%
(45.5\% --- 2.5\%) as compared to stream registers.
Furthermore, RSF improves the overall performance by
1\% to 1.4\% on average compared to JETTY and stream
registers for various benchmark suites.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shi:2017:TAA,
author = "Guoyong Shi and Hanbin Hu and Shuwen Deng",
title = "Topological Approach to Automatic Symbolic
{Macromodel} Generation for Analog Integrated
Circuits",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "47:1--47:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3015782",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the field of analog integrated circuit (IC) design,
small-signal macromodels play indispensable roles for
developing design insight and sizing reference.
However, the subject of automatically generating
symbolic low-order macromodels in human readable
circuit form has not been well studied. Traditionally,
work has been published on reducing full-scale symbolic
transfer functions to simpler forms but without the
guarantee of interpretability. On the other hand,
methodologies developed for interconnect circuits
(mainly resistor-capacitor-inductor (RCL) networks) are
not suitable for analog ICs. In this work, a
topological reduction method is introduced that is able
to automatically generate interpretable macromodel
circuits in symbolic form; that is, the circuit
elements in the compact model maintain analytical
relations of the parameters of the original full
circuit. This type of symbolic macromodel has several
benefits that other traditional modeling methods do not
offer: First, reusability, namely that designer need
not repeatedly generate macromodels for the same
circuit even it is re-sized or re-biased; second,
interpretability, namely a designer may directly
identify circuit parameters (in the original circuit)
that are closely related to the dominant frequency
characteristics, such as dc gain, gain/phase margins,
and dominant poles/zeros. The effectiveness and
computational efficiency of the proposed method have
been validated by several operational amplifier (opamp)
circuit examples.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Han:2017:CAB,
author = "Miseon Han and Youngsun Han and Seon Wook Kim and
Hokyoon Lee and Il Park",
title = "Content-Aware Bit Shuffling for Maximizing {PCM}
Endurance",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "48:1--48:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3017445",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recently, phase change memory (PCM) has been emerging
as a strong replacement for DRAM owing to its many
advantages such as nonvolatility, high capacity, low
leakage power, and so on. However, PCM is still
restricted for use as main memory because of its
limited write endurance. There have been many methods
introduced to resolve the problem by either reducing or
spreading out bit flips. Although many previous studies
have significantly contributed to reducing bit flips,
they still have the drawback that lower bits are
flipped more often than higher bits because the lower
bits frequently change their bit values. Also,
interblock wear-leveling schemes are commonly employed
for spreading out bit flips by shifting input data, but
they increase the number of bit flips per write. In
this article, we propose a noble content-aware bit
shuffling (CABS) technique that minimizes bit flips and
evenly distributes them to maximize the lifetime of PCM
at the bit level. We also introduce two additional
optimizations, namely, addition of an inversion bit and
use of an XOR key, to further reduce bit flips.
Moreover, CABS is capable of recovering from stuck-at
faults by restricting the change in values of stuck-at
cells. Experimental results showed that CABS
outperformed the existing state-of-the-art methods in
the aspect of PCM lifetime extension with minimal
overhead. CABS achieved up to 48.5\% enhanced lifetime
compared to the data comparison write (DCW) method only
with a few metadata bits. Moreover, CABS obtained
approximately 9.7\% of improved write throughput than
DCW because it significantly reduced bit flips and
evenly distributed them. Also, CABS reduced about 5.4\%
of write dynamic energy compared to DCW. Finally, we
have also confirmed that CABS is fully applicable to
BCH codes as it was able to reduce the maximum number
of bit flips in metadata cells by 32.1\%.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Saha:2017:SSS,
author = "Shamik Saha and Prabal Basu and Chidhambaranathan
Rajamanikkam and Aatreyi Bal and Koushik Chakraborty
and Sanghamitra Roy",
title = "{SSAGA}: {SMs} Synthesized for Asymmetric {GPGPU}
Applications",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "49:1--49:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3014163",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The emergence of GPGPU applications, bolstered by
flexible GPU programming platforms, has created a
tremendous challenge in maintaining high energy
efficiency in modern GPUs. In this article, we
demonstrate that customizing a Streaming Multiprocessor
(SM) of a GPU at a lower frequency is significantly
more energy efficient compared to employing DVFS on an
SM designed for a high-frequency operation. Using a
system-level CAD technique, we propose SSAGA-Streaming
Multiprocessors Synthesized for Asymmetric GPGPU
Applications -an energy-efficient GPU design paradigm.
SSAGA creates architecturally identical SM cores,
customized for different voltage-frequency domains. Our
rigorous cross-layer methodology demonstrates an
average of 20\% improvement in energy efficiency over a
spatially multitasking GPU across a range of GPGPU
applications.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lu:2017:LPC,
author = "Tiantao Lu and Ankur Srivastava",
title = "Low-Power Clock Tree Synthesis for {$3$D-ICs}",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "50:1--50:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3019610",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose efficient algorithms to construct a
low-power clock tree for through-silicon-via
(TSV)-based 3D-ICs. We use shutdown gates to save clock
trees' dynamic power, which selectively turn off
certain clock tree branches to avoid unnecessary clock
activities when the modules in these tree branches are
inactive. While this clock gating technique has been
extensively studied in 2D circuits, its application in
3D-ICs is unclear. In 3D-ICs, a shutdown gate is
connected to a control signal unit through control
TSVs, which may cause placement conflicts with existing
clock TSVs in the layout due to TSV's large physical
dimension. We develop a two-phase clock tree synthesis
design flow for 3D-ICs: (1) 3D abstract clock tree
generation based on K-means clustering and (2) clock
tree embedding with simultaneous shutdown gates'
insertion based on simulated annealing (SA) and a
force-directed TSV placer. Experimental results
indicate that (1) the K-means clustering heuristic
significantly reduces the clock power by clustering
modules with similar switching behavior and close
proximity, and (2) the SA algorithm effectively inserts
the shutdown gates to a 3D clock tree, while
considering control TSV's placement. Compared with
previous 3D clock tree synthesis techniques, our
K-means clustering-based approach achieves larger
reduction in clock tree power consumption while
ensuring zero clock skew.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2017:TPT,
author = "Woojoo Lee and Kyuseung Han and Yanzhi Wang and
Tiansong Cui and Shahin Nazarian and Massoud Pedram",
title = "{TEI}-power: Temperature Effect Inversion-Aware
Dynamic Thermal Management",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "51:1--51:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3019941",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "FinFETs have emerged as a promising replacement for
planar CMOS devices in sub-20nm technology nodes.
However, based on the temperature effect inversion
(TEI) phenomenon observed in FinFET devices, the delay
characteristics of FinFET circuits in sub-, near-, and
superthreshold voltage regimes may be fundamentally
different from those of CMOS circuits with nominal
voltage operation. For example, FinFET circuits may run
faster in higher temperatures. Therefore, the existing
CMOS-based and TEI-unaware dynamic power and thermal
management techniques would not be applicable. In this
article, we present TEI-power, a dynamic voltage and
frequency scaling--based dynamic thermal management
technique that considers the TEI phenomenon and also
the superlinear dependencies of power consumption
components on the temperature and outlines a real-time
trade-off between delay and power consumption as a
function of the chip temperature to provide significant
energy savings, with no performance penalty-namely, up
to 42\% energy savings for small circuits where the
logic cell delay is dominant and up to 36\% energy
savings for larger circuits where the interconnect
delay is considerable.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2017:UCP,
author = "Yongje Lee and Jinyong Lee and Ingoo Heo and Dongil
Hwang and Yunheung Paek",
title = "Using {CoreSight PTM} to Integrate {CRA} Monitoring
{IPs} in an {ARM}-Based {SoC}",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "52:1--52:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3035965",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The ARM CoreSight Program Trace Macrocell (PTM) has
been widely deployed in recent ARM processors for
real-time debugging and tracing of software. Using PTM,
the external debugger can extract execution behaviors
of applications running on an ARM processor. Recently,
some researchers have been using this feature for other
purposes, such as fault-tolerant computation and
security monitoring. This motivated us to develop an
external security monitor that can detect control
hijacking attacks, of which the goal is to maliciously
manipulate the control flow of victim applications at
an attacker's disposal. This article focuses on
detecting a special type of attack called code reuse
attacks (CRA), which use a recently introduced
technique that allows attackers to perform arbitrary
computation without injecting their code by reusing
only existing code fragments. Our external monitor is
attached to the outside of the host system via the
system bus and ARM CoreSight PTM, and is fed with
execution traces of a victim application running on the
host. As a majority of CRAs violates the normal
execution behaviors of a program, our monitor
constantly watches and analyzes the execution traces of
the victim application and detects a symptom of attacks
when the execution behaviors violate certain rules that
normal applications are known to adhere. We present two
different implementations for this purpose: a
hardware-based solution in which all CRA detection
components are implemented in hardware, and a
hardware/software mixed solution that can be employed
in a more resource-constrained environment where the
deployment of full hardware-level CRA detection is
burdensome.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xue:2017:FCT,
author = "Yuankun Xue and Ji Li and Shahin Nazarian and Paul
Bogdan",
title = "Fundamental Challenges Toward Making the {IoT} a
Reachable Reality: a Model-Centric Investigation",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "53:1--53:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3001934",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Constantly advancing integration capability is paving
the way for the construction of the extremely large
scale continuum of the Internet where entities or
things from vastly varied domains are uniquely
addressable and interacting seamlessly to form a giant
networked system of systems known as the
Internet-of-Things (IoT). In contrast to this visionary
networked system paradigm, prior research efforts on
the IoT are still very fragmented and confined to
disjoint explorations of different applications,
architecture, security, services, protocol, and
economical domains, thus preventing design exploration
and optimization from a unified and global perspective.
In this context, this survey article first proposes a
mathematical modeling framework that is rich in
expressivity to capture IoT characteristics from a
global perspective. It also sets forward a set of
fundamental challenges in sensing, decentralized
computation, robustness, energy efficiency, and
hardware security based on the proposed modeling
framework. Possible solutions are discussed to shed
light on future development of the IoT system
paradigm.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Guo:2017:OBP,
author = "Zimu Guo and Jia Di and Mark M. Tehranipoor and
Domenic Forte",
title = "Obfuscation-Based Protection Framework against Printed
Circuit Boards Unauthorized Operation and Reverse
Engineering",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "54:1--54:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3035482",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Printed circuit boards (PCBs) are a basic necessity
for all modern electronic systems but are becoming
increasingly vulnerable to cloning, overproduction,
tampering, and unauthorized operation. Most efforts to
prevent such attacks have only focused on the chip
level, leaving a void for PCBs and higher levels of
abstraction. In this article, we propose the first ever
obfuscation-based framework for the protection of PCBs.
Central to our approach is a permutation block that
hides the inter-chip connections between chips on the
PCB and is controlled by a key. If the correct key is
applied, then the correct connections between chips are
made. Otherwise, the connections are incorrectly
permuted, and the PCB/system fails to operate. We
propose a permutation network added to the PCB based on
a Benes network that can easily be implemented in a
complex programmable logic device or field-programmable
gate arrays. Based on this implementation, we analyze
the security of our approach with respect to (i)
brute-force attempts to reverse engineer the PCB, (ii)
brute-force attempts at guessing the correct key, and
(iii) physical and logistic attacks by a range of
adversaries. Performance evaluation results on 12
reference designs show that brute force generally
requires prohibitive time to break the obfuscation. We
also provide detailed requirements for countermeasures
that prevent reverse engineering, unauthorized
operation, and so on, for different classes of
attackers.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Torabi:2017:FHA,
author = "Mohammad Torabi and Lihong Zhang",
title = "A Fast Hierarchical Adaptive Analog Routing Algorithm
Based on Integer Linear Programming",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "55:1--55:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3035464",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The shrinking design window and high parasitic
sensitivity in advanced technologies have imposed
special challenges on analog and radio frequency (RF)
integrated circuit design. The state-of-the-art analog
routing research tends to favor linear programming to
achieve various analog constraints, which, although
effective, fail to offer high routing efficiency on its
own. In this article, we propose a new methodology to
address such a deficiency based on integer linear
programming (ILP) but without compromising the
capability of handling any special constraints for the
analog routing problems. Our proposed method supports
hierarchical routing, which can divide the entire
routing area into multiple small heterogeneous regions
where the ILP can efficiently derive routing solutions.
Distinct from the conventional methods, our algorithm
utilizes adaptive resolutions for various routing
regions. For a more congested region, a routing grid
with higher resolution is employed, whereas a
lower-resolution grid is adopted to a less-crowded
routing region. For a large empty space, routing
efficiency can be even boosted by creating more routing
hierarchy levels. This scheme is especially beneficial
to the analog and RF layouts, which are far sparser
than their digital counterparts. The experimental
results show that our proposed adaptive ILP-based
router is much faster than the conventional ones, since
it spends much less time in the areas that need no
accurate routing anyway. The higher efficiency is
demonstrated for large circuits and especially sparse
layouts along with promising routing quality in terms
of analog constraints.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Song:2017:STV,
author = "Yang Song and Kambiz Samadi and Bill Lin",
title = "A Single-Tier Virtual Queuing Memory Controller
Architecture for Heterogeneous {MPSoCs}",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "56:1--56:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3035481",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Heterogeneous MPSoCs typically integrate diverse
cores, including application CPUs, GPUs, and HD coders.
These cores commonly share an off-chip memory to save
cost and energy, but their memory accesses often
interfere with each other, leading to undesirable
consequences like a slowdown of application performance
or a failure to sustain real-time performance. The
memory controller plays a central role in meeting the
QoS needs of real-time cores while maximizing CPU
performance. Previous QoS-aware memory controllers are
based on a classic two-tier queuing architecture that
buffers memory transactions at the first tier, followed
by a second tier that buffers translated DRAM commands.
In these designs, QoS-aware policies are used to
schedule competing transactions at the first stage, but
the translated DRAM commands are served in FIFO order
at the second stage. Unfortunately, once the scheduled
transactions have been forwarded to the command stage,
newly arriving transactions that may be more critical
cannot be served ahead of those translated commands
that are already queued at the second stage. To address
this, we propose a scalable memory controller
architecture based on single-tier virtual queuing
(STVQ) that maintains a single tier of request queues
and employs an efficacious scheduler that considers
both QoS requirements and DRAM bank states. In
comparison with previous QoS-aware memory controllers,
the proposed STVQ memory controller reduces CPU
slowdown by up to 13.9\% while satisfying all frame
rate requirements. We propose further optimizations
that can significantly increase row-buffer hits by up
to 66.2\% and reduce memory latency by up to 19.8\%.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2017:ASE,
author = "Ji Li and Jeffrey Draper",
title = "Accelerated Soft-Error-Rate {(SER)} Estimation for
Combinational and Sequential Circuits",
journal = j-TODAES,
volume = "22",
number = "3",
pages = "57:1--57:??",
month = may,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3035496",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 21 10:49:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Radiation-induced soft errors have posed an increasing
reliability challenge to combinational and sequential
circuits in advanced CMOS technologies. Therefore, it
is imperative to devise fast, accurate and scalable
soft error rate (SER) estimation methods as part of
cost-effective robust circuit design. This paper
presents an efficient SER estimation framework for
combinational and sequential circuits, which considers
single-event transients (SETs) in combinational logic
and multiple cell upsets (MCUs) in sequential elements.
A novel top-down memoization algorithm is proposed to
accelerate the propagation of SETs, and a general
schematic and layout co-simulation approach is proposed
to model the MCUs for redundant sequential storage
structures. The feedback in sequential logic is
analyzed with an efficient time frame expansion method.
Experimental results on various ISCAS85 combinational
benchmark circuits demonstrate that the proposed
approach achieves up to 560.2X times speedup with less
than 3\% difference in terms of SER results compared
with the baseline algorithm. The average runtime of the
proposed framework on a variety of ISCAS89 benchmark
circuits is 7.20s, and the runtime is 119.23s for the
largest benchmark circuit with more than 3,000
flip-flops and 17,000 gates.",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2017:EEE,
author = "Kaige Yan and Lu Peng and Mingsong Chen and Xin Fu",
title = "Exploring Energy-Efficient Cache Design in Emerging
Mobile Platforms",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "58:1--58:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2843940",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Mobile devices are quickly becoming the most widely
used processors in consumer devices. Since their major
power supply is battery, energy-efficient computing is
highly desired. In this article, we focus on
energy-efficient cache design in emerging mobile
platforms. We observe that more than 40\% of L2 cache
accesses are OS kernel accesses in interactive
smartphone applications. Such frequent kernel accesses
cause serious interferences between the user and kernel
blocks in the L2 cache, leading to unnecessary block
replacements and high L2 cache miss rate. We first
propose to statically partition the L2 cache into two
separate segments, which can be accessed only by the
user code and kernel code, respectively. Meanwhile, the
overall size of the two segments is shrunk, which
reduces the energy consumption while still maintaining
the similar cache miss rate. We then find completely
different access behaviors between the two separated
kernel and user segments and explore the
multi-retention STT-RAM-based user and kernel segments
to obtain higher energy savings in this static
partition-based cache design. Finally, we propose to
dynamically partition the L2 cache into the user and
kernel segments to minimize overall cache size. We also
integrate the short-retention STT-RAM into this dynamic
partition-based cache design for maximal energy
savings. The experimental results show that our static
technique reduces cache energy consumption by 75\% with
2\% performance loss, and our dynamic technique further
shows strong capability to reduce cache energy
consumption by 85\% with only 3\% performance loss.",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kim:2017:SBS,
author = "Taehyun Kim and Jongbum Lim and Jinku Kim and
Woo-Cheol Cho and Eui-Young Chung and Hyuk-Jun Lee",
title = "Scalable Bandwidth Shaping Scheme via Adaptively
Managed Parallel Heaps in Manycore-Based Network
Processors",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "59:1--59:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3065926",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scalability of network processor-based routers heavily
depends on limitations imposed by memory accesses and
associated power consumption. Bandwidth shaping of a
flow is a key function, which requires a token bucket
per output queue and abuses memory bandwidth. As the
number of output queues increases, managing token
buckets becomes prohibitively expensive and limits
scalability. In this work, we propose a scalable
software-based token bucket management scheme that can
reduce memory accesses and power consumption
significantly. To satisfy real-time and low-cost
constraints, we propose novel parallel heap data
structures running on a manycore-based network
processor. By using cache locking, the performance of
heap processing is enhanced significantly and is more
predictable. In addition, we quantitatively analyze the
performance and memory footprint of the proposed
software scheme using stochastic modeling and the
Lyapunov central limit theorem. Finally, the proposed
scheme provides an adaptive method to limit the size of
heaps in the case of oversubscribed queues, which can
successfully isolate the queues showing unideal
behavior. The proposed scheme reduces memory accesses
by up to three orders of magnitude for one million
queues sharing a 100Gbps interface of the router while
maintaining stability under stressful scenarios.",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Agrawal:2017:OSA,
author = "Prabhav Agrawal and Mike Broxterman and Biswadeep
Chatterjee and Patrick Cuevas and Kathy H. Hayashi and
Andrew B. Kahng and Pranay K. Myana and Siddhartha
Nath",
title = "Optimal Scheduling and Allocation for {IC} Design
Management and Cost Reduction",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "60:1--60:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3035483",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A large semiconductor product company spends hundreds
of millions of dollars each year on design
infrastructure to meet tapeout schedules for multiple
concurrent projects. Resources (servers, electronic
design automation tool licenses, engineers, and so on)
are limited and must be shared --- and the cost per day
of schedule slip can be enormous. Co-constraints
between resource types (e.g., one license per every two
cores (threads)) and dedicated versus shareable
resource pools make scheduling and allocation hard. In
this article, we formulate two mixed integer-linear
programs for optimal multi-project, multi-resource
allocation with task precedence and resource
co-constraints. Application to a real-world
three-project scheduling problem extracted from a
leading-edge design center of anonymized Company X
shows substantial compute and license costs savings.
Compared to the product company, our solution shows
that the makespan of schedule of all projects can be
reduced by seven days, which not only saves
$\approx$2.7\% of annual labor and infrastructure costs
but also enhances market competitiveness. We also
demonstrate the capability of scheduling over two dozen
chip development projects at the design center level,
subject to resource and datacenter capacity limits as
well as per-project penalty functions for schedule
slips. The design center ended up purchasing 600
additional servers, whereas our solution demonstrates
that the schedule can be met without having to purchase
any additional servers. Application to a four-project
scheduling problem extracted from a leading-edge design
center in a non-US location shows availability of up to
$\approx$37\% headcount reduction during a half-year
schedule for just one type of chip design activity.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Isenberg:2017:PCH,
author = "Tobias Isenberg and Marco Platzner and Heike Wehrheim
and Tobias Wiersema",
title = "Proof-Carrying Hardware via Inductive Invariants",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "61:1--61:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3054743",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Proof-carrying hardware (PCH) is a principle for
achieving safety for dynamically reconfigurable
hardware systems. The producer of a hardware module
spends huge effort when creating a proof for a safety
policy. The proof is then transferred as a certificate
together with the configuration bitstream to the
consumer of the hardware module, who can quickly verify
the given proof. Previous work utilized SAT solvers and
resolution traces to set up a PCH technology and
corresponding tool flows. In this article, we present a
novel technology for PCH based on inductive invariants.
For sequential circuits, our approach is fundamentally
stronger than the previous SAT-based one since we avoid
the limitations of bounded unrolling. We contrast our
technology to existing ones and show that it fits into
previously proposed tool flows. We conduct experiments
with four categories of benchmark circuits and report
consumer and producer runtime and peak memory
consumption, as well as the size of the certificates
and the distribution of the workload between producer
and consumer. Experiments clearly show that our new
induction-based technology is superior for sequential
circuits, whereas the previous SAT-based technology is
the better choice for combinational circuits.",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bonetti:2017:AID,
author = "Andrea Bonetti and Nicholas Preyss and Adam Teman and
Andreas Burg",
title = "Automated Integration of Dual-Edge Clocking for
Low-Power Operation in Nanometer Nodes",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "62:1--62:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3054744",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Clocking power, including both clock distribution and
registers, has long been one of the primary factors in
the total power consumption of many digital systems.
One straightforward approach to reduce this power
consumption is to apply dual-edge-triggered (DET)
clocking, as sequential elements operate at half the
clock frequency while maintaining the same throughput
as with conventional single-edge-triggered (SET)
clocking. However, the DET approach is rarely taken in
modern integrated circuits, primarily due to the
perceived complexity of integrating such a clocking
scheme. In this article, we first identify the most
promising conditions for achieving low-power operation
with DET clocking and then introduce a fully automated
design flow for applying DET to a conventional SET
design. The proposed design flow is demonstrated on
three benchmark circuits in a 40nm CMOS technology,
providing as much as a 50\% reduction in clock
distribution and register power consumption.",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2017:DMF,
author = "Katherine Shu-Min Li and Sying-Jyan Wang",
title = "Design Methodology of Fault-Tolerant Custom {$3$D}
Network-on-Chip",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "63:1--63:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3054745",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A systematic design methodology is presented for
custom Network-on-Chip (NoC) in three-dimensional
integrated circuits (3D-ICs). In addition, fault
tolerance is supported in the NoC if extra links are
included in the NoC topology. In the proposed method,
processors and the communication architecture are
synthesized simultaneously in the 3D floorplanning
process. 3D-IC technology enables ICs to be implemented
in smaller size with higher performance; on the flip
side, 3D-ICs suffer yield loss due to multiple dies in
a 3D stack and lower manufacturing yield of
through-silicon vias (TSVs). To alleviate this problem,
a known-good-dies (KGD) test can be applied to ensure
every die to be packaged into a 3D-IC is fault-free.
However, faulty TSVs cannot be tested in the KGD test.
In this article, the proposed method deals with the
problem by providing fault tolerance in the NoC
topology. The efficiency of the proposed method is
evaluated using several benchmark circuits, and the
experimental results show that the proposed method
produces 3D NoCs with comparable performance than
previous methods when fault-tolerant features are not
realized. With fault tolerance in NoCs, higher yield
can be achieved at the cost of performance penalty and
elevated power level.",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pagliari:2017:AEE,
author = "Daniele Jahier Pagliari and Enrico Macii and Massimo
Poncino",
title = "Approximate Energy-Efficient Encoding for Serial
Interfaces",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "64:1--64:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3041220",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Serial buses are ubiquitous interconnections in
embedded computing systems that are used to interface
processing elements with peripherals, such as sensors,
actuators, and I/O controllers. Despite their limited
wiring, as off-chip connections they can account for a
significant amount of the total power consumption of a
system-on-chip device. Encoding the information sent on
these buses is the most intuitive and affordable way to
reduce their power contribution; moreover, the encoding
can be made even more effective by exploiting the fact
that many embedded applications can tolerate
intermediate approximations without a significant
impact on the final quality of results, thus trading
off accuracy for power consumption. We propose a simple
yet very effective approximate encoding for reducing
dynamic energy in serial buses. Our approach uses
differential encoding as a baseline scheme and extends
it with bounded approximations to overcome the
intrinsic limitations of differential encoding for data
with low temporal correlation. We show that the
proposed scheme, in addition to yielding extremely
compact codecs, is superior to all state-of-the-art
approximate serial encodings over a wide set of traces
representing data received or sent from/to sensor or
actuators.",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Schafer:2017:PHL,
author = "Benjamin Carrion Schafer",
title = "Parallel High-Level Synthesis Design Space Exploration
for Behavioral {IPs} of Exact Latencies",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "65:1--65:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3041219",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This works presents a Design Space Exploration (DSE)
method for Behavioral IPs (BIPs) given in ANSI-C or
SystemC to find the smallest micro-architecture for a
specific target latency. Previous work on High-Level
Synthesis (HLS) DSE mainly focused on finding a
tradeoff curve with Pareto-optimal designs. HLS is,
however, a single process (component) synthesis method.
Very often, the latency of the components requires a
specific fixed latency when inserted within a larger
system. This work presents a fast multi-threaded method
to find the smallest micro-architecture for a given BIP
and target latency by discriminating between all
different exploration knobs and exploring these
concurrently. Experimental results show that our
proposed method is very effective and comprehensive
results compare the quality of results vs. the speedup
of your proposed explorer.",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Moudallal:2017:GCC,
author = "Zahi Moudallal and Farid N. Najm",
title = "Generating Current Constraints to Guarantee {RLC}
Power Grid Safety",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "66:1--66:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3054746",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A critical task during early chip design is the
efficient verification of the chip power distribution
network. Vectorless verification, developed since the
mid-2000s as an alternative to traditional
simulation-based methods, requires the user to specify
current constraints (budgets) for the underlying
circuitry and checks if the corresponding voltage
variations on all grid nodes are within a
user-specified margin. This framework is extremely
powerful, as it allows for efficient and early
verification, but specifying/obtaining current
constraints remains a burdensome task for users and a
hurdle to adoption of this framework by the industry.
Recently, the inverse problem has been introduced:
Generate circuit current constraints that, if satisfied
by the underlying logic circuitry, would guarantee grid
safety from excessive voltage variations. This approach
has many potential applications, including various grid
quality metrics, as well as voltage drop-aware
placement and floorplanning. So far, this framework has
been developed assuming only resistive and capacitive
(RC) elements in the power grid model. Inductive
effects are becoming a significant component of the
power supply noise and can no longer be ignored. In
this article, we extend the constraints generation
approach to allow for inductance. We give a rigorous
problem definition and develop some key theoretical
results related to maximality of the current space
defined by the constraints. Based on this, we then
develop three constraints generation algorithms that
target the peak total chip power that is allowed by the
grid, the uniformity of current distribution across the
die area, and a combination of both metrics.",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2017:TMR,
author = "Irith Pomeranz and M. Enamul Amyeen and Srikanth
Venkataraman",
title = "Test Modification for Reduced Volumes of Fail Data",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "67:1--67:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3065925",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As part of a yield improvement process, fail data is
collected from faulty units. Several approaches exist
for reducing the tester time and the volume of fail
data that needs to be collected based on the
observation that a subset of the fail data is
sufficient for accurate defect diagnosis. This article
addresses the volume of fail data by considering the
test set that is used for collecting fail data. It
observes that certain faults from a set of target
faults produce significantly larger numbers of faulty
output values (and therefore significantly larger
volumes of fail data) than other faults under a given
test set. Based on this observation, it describes a
procedure for modifying the test set to reduce the
maximum number of faulty output values that a target
fault produces. When defects are considered in a
simulation experiment, and a defect diagnosis procedure
is applied to the fail data that they produce, two
effects are observed: the maximum and average numbers
of faulty output values per defect are reduced
significantly with the modified test set, and the
quality of diagnosis is similar or even improved with
the modified test set.",
acknowledgement = ack-nhfb,
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2017:MSS,
author = "Ya Wang and Di Gao and Dani Tannir and Ning Dong and
G. Peter Fang and Wei Dong and Peng Li",
title = "Multiharmonic Small-Signal Modeling of Low-Power {PWM}
{DC-DC} Converters",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "68:1--68:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3057274",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Small-signal models of pulse-width modulation (PWM)
converters are widely used for analyzing stability and
play an important role in converter design and control.
However, existing small-signal models either are based
on averaged DC behaviors, and hence are unable to
capture frequency responses that are faster than the
switching frequency, or greatly approximate these
high-frequency responses. We address the severe
limitations of the existing models by proposing a
multiharmonic model that provides a complete
small-signal characterization of both DC averages and
high-order harmonic responses. The proposed model
captures important high-frequency overshoots and
undershoots of the converter response, which are
otherwise unaccounted for by the existing techniques.
In two converter examples, the proposed model corrects
the misleading results of the existing models by
providing truthful characterization of the overall
converter AC response and offers important guidance for
converter design and closed-loop control.",
acknowledgement = ack-nhfb,
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Albalawi:2017:TFP,
author = "Hassan Albalawi and Yuanning Li and Xin Li",
title = "Training Fixed-Point Classifiers for On-Chip Low-Power
Implementation",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "69:1--69:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3057275",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we develop several novel algorithms
to train classifiers that can be implemented on chip
with low-power fixed-point arithmetic with extremely
small word length. These algorithms are based on Linear
Discriminant Analysis (LDA), Support Vector Machine
(SVM), and Logistic Regression (LR), and are referred
to as LDA-FP, SVM-FP, and LR-FP, respectively. They
incorporate the nonidealities (i.e., rounding and
overflow) associated with fixed-point arithmetic into
the offline training process so that the resulting
classifiers are robust to these nonidealities.
Mathematically, LDA-FP, SVM-FP, and LR-FP are
formulated as mixed integer programming problems that
can be robustly solved by the branch-and-bound methods
described in this article. Our numerical experiments
demonstrate that LDA-FP, SVM-FP, and LR-FP
substantially outperform the conventional approaches
for the emerging biomedical applications of brain
decoding.",
acknowledgement = ack-nhfb,
articleno = "69",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hoveida:2017:EMA,
author = "Mohaddeseh Hoveida and Fatemeh Aghaaliakbari and Ramin
Bashizade and Mohammad Arjomand and Hamid
Sarbazi-Azad",
title = "Efficient Mapping of Applications for Future
Chip-Multiprocessors in Dark Silicon Era",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "70:1--70:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3055202",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The failure of Dennard scaling has led to the
utilization wall that is the source of dark silicon and
limits the percentage of a chip that can actively
switch within a given power budget. To address this
issue, a structure is needed to guarantee the limited
power budget along with providing sufficient
flexibility and performance for different applications
with various communication requirements. In this
article, we present a general-purpose platform for
future many-core Chip-Multiprocessors (CMPs) that
benefits from the advantages of clustering,
Network-on-Chip (NoC) resource sharing among cores, and
power gating the unused components of clusters. We also
propose two task mapping methods for the proposed
platform in which active and dark cores are dispersed
appropriately, so that an excess of power budget can be
obtained. Our evaluations reveal that the first and
second proposed mapping mechanisms respectively reduce
the execution time by up to 28.6\% and 39.2\% and the
NoC power consumption by up to 11.1\% and 10\%, and
gain an excess power budget of up to 7.6\% and 13.4\%
over the baseline architecture.",
acknowledgement = ack-nhfb,
articleno = "70",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Saha:2017:STS,
author = "Sangeet Saha and Arnab Sarkar and Amlan Chakrabarti",
title = "Spatio-Temporal Scheduling of Preemptive Real-Time
Tasks on Partially Reconfigurable Systems",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "71:1--71:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3056561",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Reconfigurable devices that promise to offer the twin
benefits of flexibility as in general-purpose
processors along with the efficiency of dedicated
hardwares often provide a lucrative solution for many
of today's highly complex real-time embedded systems.
However, online scheduling of dynamic hard real-time
tasks on such systems with efficient resource
utilization in terms of both space and time poses an
enormously challenging problem. We attempt to solve
this problem using a combined offline-online approach.
The offline component generates and stores various
optional feasible placement solutions for different
sub-sets of tasks that may possibly be co-mapped
together. Given a set of periodic preemptive real-time
tasks that requires to be executed at runtime, the
online scheduler first carries out an admission control
procedure and then produces a schedule, which is
guaranteed to meet all timing constraints provided it
is spatially feasible to place designated subsets of
these tasks at specified scheduling points within a
future time interval. These feasibility checks are done
and actual placement solutions are obtained through a
low overhead search of the statically precomputed
placement solutions. Based on this approach, we have
proposed a periodic preemptive real-time scheduling
methodology for runtime partially reconfigurable
devices. Effectiveness of the proposed strategy has
been verified through simulation based experiments and
we observed that the strategy achieves high resource
utilization with low task rejection rates over various
simulation scenarios.",
acknowledgement = ack-nhfb,
articleno = "71",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Abella:2017:MBW,
author = "Jaume Abella and Maria Padilla and Joan {Del Castillo}
and Francisco J. Cazorla",
title = "Measurement-Based Worst-Case Execution Time Estimation
Using the Coefficient of Variation",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "72:1--72:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3065924",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Extreme Value Theory (EVT) has been historically used
in domains such as finance and hydrology to model
worst-case events (e.g., major stock market
incidences). EVT takes as input a sample of the
distribution of the variable to model and fits the tail
of that sample to either the Generalised Extreme Value
(GEV) or the Generalised Pareto Distribution (GPD).
Recently, EVT has become popular in real-time systems
to derive worst-case execution time (WCET) estimates of
programs. However, the application of EVT is not
straightforward and requires a detailed analysis of,
and customisation for, the particular problem at hand.
In this article, we tailor the application of EVT to
timing analysis. To that end, (1) we analyse the
response time of different hardware resources (e.g.,
cache memories) and identify those that may lead to
radically different types of execution time
distributions. (2) We show that one of these
distributions, known as mixture distribution, causes
problems in the use of EVT. In particular, mixture
distributions challenge not only properly selecting
GEV/GPD parameters (i.e., location, scale and shape)
but also determining the size of the sample to ensure
that enough tail values are passed to EVT and that only
tail values are used by EVT to fit GEV/GPD. Failing to
select these parameters has a negative impact on the
quality of the derived WCET estimates. We tackle these
problems, by (3) proposing Measurement-Based
Probabilistic Timing Analysis using the Coefficient of
Variation (MBPTA-CV), a new mixture-distribution aware,
WCET-suited MBPTA method that builds on recent EVT
developments in other fields (e.g., finance) to
automatically select the distribution parameters that
best fit the maxima of the observed execution times.
Our results on a simulation environment and a real
board show that MBPTA-CV produces high-quality WCET
estimates.",
acknowledgement = ack-nhfb,
articleno = "72",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Salcic:2017:NHH,
author = "Zoran Salcic and Heejong Park and J{\"u}rgen Teich and
Avinash Malik and Muhammad Nadeem",
title = "{Noc-HMP}: a Heterogeneous Multicore Processor for
Embedded Systems Designed in {SystemJ}",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "73:1--73:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3073416",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Scalability and performance in multicore processors
for embedded and real-time systems usually don't go
well each with the other. Networks on Chip (NoCs)
provide scalable execution platforms suitable for such
kind of embedded systems. This article presents a
NoC-based Heterogeneous Multi-Processor system, called
NoC-HMP, which is a scalable platform for embedded
systems developed in the GALS language SystemJ. NoC-HMP
uses a time-predictable TDMA-MIN NoC to guarantee
latencies and communication time between the two types
of time-predictable cores and can be customized for a
specific performance goal through the execution
strategy and scheduling of SystemJ program deployed
across multiple cores. Examples of different execution
strategies are introduced, explored and analyzed via
measurements. The number of used cores can be minimized
to achieve the target performance of the application.
TDMA-MIN allows easy extensions of NoC-HMP with other
cores or IP blocks. Experiments show a significant
improvement of performance over a single core system
and demonstrate how the addition of cores affects the
performance of the designed system.",
acknowledgement = ack-nhfb,
articleno = "73",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Behera:2017:TTS,
author = "Lalatendu Behera and Purandar Bhaduri",
title = "Time-Triggered Scheduling of Mixed-Criticality
Systems",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "74:1--74:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3073415",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Real-time and embedded systems are moving from the
traditional design paradigm to integration of multiple
functionalities onto a single computing platform. Some
of the functionalities are safety critical and subject
to certification. The rest of the functionalities are
nonsafety critical and do not need to be certified.
Designing efficient scheduling algorithms which can be
used to meet the certification requirement is
challenging. Our research considers the time-triggered
approach to scheduling of mixed-criticality jobs with
two criticality levels. The first proposed algorithm
for the time-triggered approach is based on the OCBP
scheduling algorithm which finds a fixed-priority order
of jobs. Based on this priority order, the existing
algorithm constructs two scheduling tables
S$_{LO}^{oc}$ and S$_{HI}^{oc}$. The scheduler uses
these tables to find a scheduling strategy. Another
time-triggered algorithm called MCEDF was proposed as
an improvement over the OCBP-based algorithm. Here we
propose an algorithm which directly constructs two
scheduling tables without using a priority order.
Furthermore, we show that our algorithm schedules a
strict superset of instances which can be scheduled by
the OCBP-based algorithm as well as by MCEDF. We show
that our algorithm outperforms both the OCBP-based
algorithm and MCEDF in terms of the number of instances
scheduled in a randomly generated set of instances. We
generalize our algorithm for jobs with m criticality
levels. Subsequently, we extend our algorithm to find
scheduling tables for periodic and dependent jobs.
Finally, we show that our algorithm is also applicable
to mixed-criticality synchronous programs upon
uniprocessor platforms and schedules a bigger set of
instances than the existing algorithm.",
acknowledgement = ack-nhfb,
articleno = "74",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2017:ILA,
author = "Derong Liu and Bei Yu and Salim Chowdhury and David Z.
Pan",
title = "Incremental Layer Assignment for Timing Optimization",
journal = j-TODAES,
volume = "22",
number = "4",
pages = "75:1--75:??",
month = jul,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3083727",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:32 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With VLSI technology nodes scaling into the nanometer
regime, interconnect delay plays an increasingly
critical role in timing. For layer assignment, most
works deal with via counts or total net delays,
ignoring critical paths of each net and resulting in
potential timing issues. In this article, we propose an
incremental layer assignment framework targeting delay
optimization in timing the critical path of each net. A
set of novel techniques are presented: self-adaptive
quadruple partition based on K $ \times $ K division
benefits the runtime; semidefinite programming is
utilized for each partition; and the sequential mapping
algorithm guarantees integer solutions while satisfying
edge capacities; additionally, concurrent mapping
offers a global view of assignment and post delay
optimization reduces the path timing violations. The
effectiveness of our work is verified by ISPD'08
benchmarks.",
acknowledgement = ack-nhfb,
articleno = "75",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bi:2017:OQE,
author = "Zhaori Bi and Dian Zhou and Sheng-Guo Wang and Xuan
Zeng",
title = "Optimization and Quality Estimation of Circuit Design
via Random Region Covering Method",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "1:1--1:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3084685",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Random region covering is a global optimization
technique that explores the landscape by introducing
multiple random starting points to initiate the local
optimization solvers. This study applies the random
region covering technique to circuit design automation
and proposes a theory to explain why this technique is
efficient at searching for the global optimum. In
addition to analyzing the efficiency of the random
region covering algorithm, the theory gives a
probability-based estimation of the goodness of the
optimization result. To enhance the efficiency of the
random region covering technique, this work evaluates
the boundary of top performance regions and proposes a
modified random region covering method that only
performs the global optimization on the top design
region. The results from a large number of mathematical
experiments verify the proposed methodology. The
optimized designs of a class-E power amplifier and a
wide load range operational amplifier outperform both
manual designs and other state-of-the-art optimization
techniques.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jeong:2017:CSP,
author = "Jae Woong Jeong and Vishwanath Natarajan and Shreyas
Sen and Tm Mak and Jennifer Kitchen and Sule Ozev",
title = "A Comprehensive {BIST} Solution for Polar Transceivers
Using On-Chip Resources",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "2:1--2:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3084689",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a Built-in self-test (BIST)
solution for polar transceivers with low cost and high
accuracy. Radio frequency (RF) Polar transceivers are
desirable for portable devices due to higher power
efficiency compared to traditional RF Cartesian
transceivers. Unfortunately, their design is quite
challenging due to substantially different signal paths
that need to work coherently to ensure signal quality.
In the receiver, phase and gain mismatches degrade
sensitivity and error vector magnitude. In the
transmitter, delay skew between the envelope and phase
signals and the finite envelope bandwidth can create
intermodulation distortion, which leads to violation of
spectral mask requirements. Typically, these parameters
are not directly measured but calibrated through
spectral analysis using expensive RF equipment, leading
to lengthy and costly measurement/calibration cycles.
However, characterization and calibration of these
parameters with analytical model would reduce the test
time and cost considerably. In this article, we propose
a technique to measure with the intent to calibrate
impairments of the polar transceiver in the loop-back
mode. Simulation and hardware measurement results show
that the proposed technique can characterize the
targeted impairments accurately.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vatanparvar:2017:EVO,
author = "Korosh Vatanparvar and Mohammad Abdullah {Al
Faruque}",
title = "Electric Vehicle Optimized Charge and Drive
Management",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "3:1--3:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3084686",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Electric vehicles (EVs) have been considered as a
solution to the environmental issues caused by
transportation, such as air pollution and greenhouse
gas emission. However, limited energy capacity, scarce
EV supercharging stations, and long recharging time
have brought anxiety to drivers who use EVs as their
main mean of transportation. Furthermore, EV owners
need to deal with a huge battery replacement cost when
the battery capacity degrades. Yet in-house EV chargers
affect the pattern of the power grid load, which is not
favorable to the utilities. The driving route,
departure/arrival time of daily trips, and electricity
price influence the EV energy consumption, battery
lifetime, electricity cost, and EV charger load on the
power grid. The EV driving range and battery lifetime
issues have been addressed by battery management
systems and route optimization methodologies. However,
in this article, we are proposing an optimized charge
and drive management (OCDM) methodology that selects
the optimal driving route, schedules daily trips, and
optimizes the EV charging process while considering the
driver's timing preference. Our methodology will
improve the EV driving range, extend the battery
lifetime, reduce the recharging cost, and diminish the
influence of EV chargers on the power grid. The
performance of our methodology compared to the state of
the art have been analyzed by experimenting on three
benchmark EVs and three drivers. Our methodology has
decreased EV energy consumption by 27\%, improved the
battery lifetime by 24.8\%, reduced the electricity
cost by 35\%, and diminished the power grid peak load
by 17\% while increasing less than 20 minutes of daily
driving time. Moreover, the scalability of our OCDM
methodology for different parameters (e.g., time
resolution and multiday cycles) in terms of execution
time and memory usage has been analyzed.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2017:WPL,
author = "Shuai Wang and Guangshan Duan and Yupeng Li and
Qianhao Dong",
title = "Word- and Partition-Level Write Variation Reduction
for Improving Non-Volatile Cache Lifetime",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "4:1--4:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3084690",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Non-volatile memory technologies are among the most
promising technologies for implementing the main
memories and caches in future microprocessors and
replacing the traditional DRAM and SRAM technologies.
However, one of the most challenging design issues of
the non-volatile memory technologies is the limited
write. In this article, we first propose to exploit the
narrow-width values to improve the lifetime of
non-volatile last-level caches with word-level write
variation reduction. Leading zeros masking scheme is
proposed to reduce the write stress to the upper half
of the narrow-width data. To balance the write
variations between the upper half and the lower half of
the narrow-width data, two swapping schemes, the swap
on write (SW) and swap on replacement (SRepl), are
proposed. Two existing optimization schemes, the
multiple dirty bit (MDB) and read before write (RBW),
are adopted with our word-level swapping design. To
further reduce the write variation on the partition
level, we propose to exploit the cache partitioning
design to improve the lifetime. Based on the
observation that different applications demonstrate
different cache access (write) behaviors, we propose to
partition the last-level cache for different
applications and balance the write variations by
partition swapping. Both software-based and
hardware-based partitioning and swapping schemes are
proposed and evaluated for different situations. Our
experimental results show that both our word- and
partition-level designs can improve the lifetime of the
non-volatile caches effectively with low performance
and energy overheads.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Trinadh:2017:ODC,
author = "A. Satya Trinadh and Seetal Potluri and Sobhan Babu
Ch. and V. Kamakoti and Shiv Govind Singh",
title = "Optimal Don't Care Filling for Minimizing Peak Toggles
During At-Speed Stuck-At Testing",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "5:1--5:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3084684",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to the increase in manufacturing/environmental
uncertainties in the nanometer regime, testing digital
chips under different operating conditions becomes
mandatory. Traditionally, stuck-at tests were applied
at slow speed to detect structural defects and
transition fault tests were applied at-speed to detect
delay defects. Recently, it was shown that certain
cell-internal defects can only be detected using
at-speed stuck-at testing. Stuck-at test patterns are
power hungry, thereby causing excessive voltage droop
on the power grid, delaying the test response, and
finally leading to false delay failures on the tester.
This motivates the need for peak power minimization
during at-speed stuck-at testing. In this article, we
use input toggle minimization as a means to minimize a
circuit's power dissipation during at-speed stuck-at
testing under the Combinational State Preservation scan
(CSP-scan) Design-For-Testability (DFT) scheme. For
circuits whose test sets are dominated by don't cares,
this article maps the problem of optimal X-filling for
peak input toggle minimization to a variant of the
interval coloring problem and proposes a Dynamic
Programming (DP) algorithm (DP-fill) for the same along
with a theoretical proof for its optimality. For
circuits whose test sets are not dominated by don't
cares, we propose a max scatter Hamiltonian path
algorithm, which ensures that the ordering is done such
that the don't cares are evenly distributed in the
final ordering of test cubes, thereby leading to better
input toggle savings than DP-fill. The proposed
algorithms, when experimented on ITC99 benchmarks,
produced peak power savings of up to 48\% over the
best-known algorithms in literature. We have also
pruned the solutions thus obtained using Greedy and
Simulated Annealing strategies with iterative 1-bit
neighborhood to validate our idea of optimal input
toggle minimization as an effective technique for
minimizing peak power dissipation during at-speed
stuck-at testing.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2017:TSL,
author = "Xingquan Li and Wenxing Zhu",
title = "Two-Stage Layout Decomposition for Hybrid E-Beam and
Triple Patterning Lithography",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "6:1--6:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3084683",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Hybrid e-beam lithography (EBL) and triple patterning
lithography (TPL) are advanced technologies for the
manufacture of integrated circuits. We propose a
technology that combines the advantages of EBL and TPL,
which is more promising for the pattern product
industry. Layout decomposition is a crucial step in
this technology. In this article, we propose a
two-stage decomposition flow for the hybrid e-beam and
triple patterning lithography of the general layout
decomposition (HETLD) problem. At the first stage, we
formulate two optimization problems: the e-beam and
stitch-aware TPL mask assignment (ESTMA) problem and
the extended minimum weight dominating set for R$_4$
mask assignment (MDS R$_4$ MA) problem. Binary linear
program formulations of the two problems are solved by
the cutting plane approach. At the second stage,
solutions of the first stage problems are legalized to
feasible solutions of the HETLD problem by stitch
insertion and e-beam shot. To speed up decomposition,
we reduce the problem size by removing some vertices
and some minor conflict edges before decomposition.
Experimental results show the effectiveness of our
decomposition methods based on ESTMA and MDS R$_4$
MA.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Das:2017:VBP,
author = "Sourav Das and Dongjin Lee and Wonje Choi and
Janardhan Rao Doppa and Partha Pratim Pande and
Krishnendu Chakrabarty",
title = "{VFI}-Based Power Management to Enhance the Lifetime
of High-Performance {$3$D} {NoCs}",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "7:1--7:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3092843",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The emergence of 3D network-on-chip (NoC) has
revolutionized the design of high-performance and
energy-efficient manycore chips. However, the
anticipated performance gain can be compromised due to
the degradation and failure of vertical links (VLs).
The Through-Silicon-Via (TSV)-enabled VLs may fail due
to workload-induced stress; the failure of a VL can
affect the neighboring VLs, thereby causing a cascade
of failures and reducing the lifetime of the chip. To
enhance the reliability of 3D NoC-enabled manycore
chips, we propose to incorporate a voltage-frequency
island (VFI)-based power management strategy that helps
to reduce the energy consumption and hence, the
workload-induced stress of the highly utilized VLs. The
adopted power-management strategy relies on control
decisions about the voltage/frequency (V/F) levels on
VLs. We demonstrate that compared to the well-known
spare TSV allocation and adaptive routing strategies,
power management is more effective in enhancing the
reliability of a 3D NoC. VFI-based power management
improves the reliability of the 3D NoC by one order of
magnitude compared to both adaptive routing and spare
allocation while running popular SPLASH-2 and PARSEC
benchmarks. The principal benefit of power management
is that it is capable of reducing the operating
temperature of the system, which in turn enhances the
Mean-Time-To-Failure (MTTF) of the VLs and reliability
of the overall 3D NoC.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Murugesan:2017:NRM,
author = "Shanmugakumar Murugesan and Noor Mahammad Sk",
title = "A Novel Range Matching Architecture for Packet
Classification Without Rule Expansion",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "8:1--8:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3105958",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The speed requirement for the routing table lookup and
the packet classification is rapidly increasing due to
the increase in the number of packets needed to be
processed per second. The hardware-based packet
classification relies on ternary content addressable
memory (TCAM) to meet this speed requirement. However,
TCAM consumes huge power and also supports only for
longest prefix match and exact match, where the
classification rule also has a range match (RM) field.
Hence, it is mandatory to encode the RM into prefix
match to accommodate the rule in TCAM. In the worst
case, one rule is encoded into (2 W -2)$^2$ rules
(where W is a number of bits to represent range). This
work proposes a novel RM architecture, and a detailed
analysis about the range field on the standard dataset
and the real-life classifier rules are presented. In
the literature, the existing RM architecture is used to
avoid the range to prefix conversion, but due to the
serial operation, it lacks in performance. For constant
time lookup, TCAM is the best option, but it does not
support RM. The proposed architecture takes one clock
cycle for RM and does not require any encoding/
conversion. Hence, there will be a single entry for
every rule. It is observed that just 4\% of the
two-dimensional range rules are present in this
dataset, and it will increase the rule set size by 4
times in the best case and nearly 30 times in the worst
case. The proposed RM circuit is operated in parallel
with TCAM without compromising the speed, and this
circuit saves huge power around 70\% and area around
61\%, where the range to prefix conversion/encoding is
completely avoided. The proposed architecture is well
suited for current IPv4- and IPv6-based networks, as
well as in software-defined networks in the near
future.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chithira:2017:HTS,
author = "P. R. Chithira and Vinita Vasudevan",
title = "A Hierarchical Technique for Statistical Path
Selection and Criticality Computation",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "9:1--9:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3107030",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to process variations, every path in the circuit
is associated with a probability of being critical and
a measure of this probability is the criticality of the
path. Identification of critical paths usually proceeds
in two steps, namely, generation of a candidate path
set followed by computation of path criticality. As
criticality computation is expensive, the candidate
path set is chosen using simpler metrics. However,
these metrics are not directly related to path
criticality and, often, the set also contains low
criticality paths that do not need to be tested. In
this article, we propose a hierarchical technique that
directly gives all paths above a global criticality
threshold. The circuit is divided into disjoint groups
at various levels. We show that the criticality of a
group at each level of hierarchy can be computed using
criticality of the parent group and the local
complementary delay within the group. Low criticality
groups are pruned at every level, making the
computation efficient. This recursive partitioning and
group criticality computation is continued until the
group criticality falls below a threshold. Beyond this,
the path selection within the group is done using
branch-and-bound algorithm with global criticality as
the metric. This is possible, since our method for
criticality computation is very efficient. Unlike other
techniques, path selection and criticality computation
are integrated together so that when the path selection
is complete, path criticality is also obtained. The
proposed algorithm is tested with ISCAS'85, ISCAS'89,
and ITC'99 benchmark circuits and the results are
verified using Monte Carlo simulation. The experimental
results suggest that the proposed method gives better
accuracy on average with around 90\% reduction in
run-time.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Moon:2017:ASP,
author = "Hyungon Moon and Jinyong Lee and Dongil Hwang and
Seonhwa Jung and Jiwon Seo and Yunheung Paek",
title = "Architectural Supports to Protect {OS} Kernels from
Code-Injection Attacks and Their Applications",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "10:1--10:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3110223",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The kernel code injection is a common behavior of
kernel-compromising attacks where the attackers aim to
gain their goals by manipulating an OS kernel. Several
security mechanisms have been proposed to mitigate such
threats, but they all suffer from non-negligible
performance overhead. This article introduces a
hardware reference monitor, called Kargos, which can
detect the kernel code injection attacks with nearly
zero performance cost. Kargos monitors the behaviors of
an OS kernel from outside the CPU through the standard
bus interconnect and debug interface available with
most major microprocessors. By watching the execution
traces and memory access events in the monitored target
system, Kargos uncovers attempts to execute malicious
code with the kernel privilege. On top of this, we also
applied the architectural supports for Kargos to the
detection of ROP attacks. KS-Stack is the hardware
component that builds and maintains the shadow stacks
using the existing supports to detect this ROP attacks.
According to our experiments, Kargos detected all the
kernel code injection attacks that we tested, yet just
increasing the computational loads on the target CPU by
less than 1\% on average. The performance overhead of
the KS-Stack was also less than 1\%.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2017:ELD,
author = "Yunfeng Yang and Wai-Shing Luk and Hai Zhou and David
Z. Pan and Dian Zhou and Changhao Yan and Xuan Zeng",
title = "An Effective Layout Decomposition Method for {DSA}
with Multiple Patterning in Contact-Hole Generation",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "11:1--11:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3131847",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Directed self-assembly (DSA) complemented with
multiple patterning (MP) is an attractive next
generation lithography (NGL) technique for contact-hole
generation. Nevertheless, a high-quality DSA-aware
layout decomposer is required to enable the technology.
In this article, we introduce an efficient method which
incorporates a set packing for generating DSA template
candidates and a local search method. Besides, a
multi-start strategy is integrated into the framework
to prevent the local minima. Our framework encourages
the reuse of existing coloring solvers. Hence, the
development cost can significantly be reduced. In
addition, for DSA multiple patterning where the number
of masks is larger than two, we present an efficient
iterative partition based method. Experimental results
show that compared with the state-of-the-art work, our
methods can achieve roughly 100$ \times $ speedup for
double patterning, and 78.8\% conflict reduction with
5$ \times $ speedup for triple patterning on the dense
graphs.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2017:AMM,
author = "Chao Chen and Giovanni Beltrame",
title = "An Adaptive {Markov} Model for the Timing Analysis of
Probabilistic Caches",
journal = j-TODAES,
volume = "23",
number = "1",
pages = "12:1--12:??",
month = oct,
year = "2017",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3123877",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 22 09:03:33 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Accurate timing prediction for real-time embedded
software execution is becoming a problem due to the
increasing complexity of computer architecture, and the
presence of mixed-criticality workloads. Probabilistic
caches were proposed to set bounds to Worst Case
Execution Time (WCET) estimates and help designers
improve real-time embedded system resource use. Static
Probabilistic Timing Analysis (SPTA) for probabilistic
caches is nevertheless difficult to perform, because
cache accesses depend on execution history, and the
computational complexity of SPTA makes it intractable
for calculation as the number of accesses increases. In
this paper, we explore and improve SPTA for caches with
evict-on-miss random replacement policy using a state
space modeling technique. A nonhomogeneous Markov model
is employed for single-path programs in discrete-time
finite state space representation. To make this Markov
model tractable, we limit the number of states and use
an adaptive method for state modification. Experiments
show that compared to the state-of-the-art methodology,
the proposed adaptive Markov chain approach provides
better results at the occurrence probability of
10$^{-15}$: in terms of accuracy, the state-of-the-art
SPTA results are more conservative, by 11\% more on
average. In terms of computation time, our approach is
not significantly different from the state-of-the-art
SPTA.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kritikakou:2018:DDS,
author = "Angeliki Kritikakou and Thibaut Marty and Matthieu
Roy",
title = "{DYNASCORE}: {DYNAmic Software COntroller to Increase
REsource} Utilization in Mixed-Critical Systems",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "13:1--13:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3110222",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In real-time mixed-critical systems, Worst-Case
Execution Time (WCET) analysis is required to guarantee
that timing constraints are respected-at least for
high-criticality tasks. However, the WCET is
pessimistic compared to the real execution time,
especially for multicore platforms. As WCET computation
considers the worst-case scenario, it means that
whenever a high-criticality task accesses a shared
resource in multicore platforms, it is considered that
all cores use the same resource concurrently. This
pessimism in WCET computation leads to a dramatic
underutilization of the platform resources, or even
failing to meet the timing constraints. In order to
increase resource utilization while guaranteeing
real-time guarantees for high-criticality tasks,
previous works proposed a runtime control system to
monitor and decide when the interferences from
low-criticality tasks cannot be further tolerated.
However, in the initial approaches, the points where
the controller is executed were statically predefined.
In this work, we propose a dynamic runtime control
which adapts its observations to online temporal
properties, further increasing the dynamism of the
approach, and mitigating the unnecessary overhead
implied by existing static approaches. Our dynamic
adaptive approach allows one to control the ongoing
execution of tasks based on runtime information, and
further increases the gains in terms of resource
utilization compared with static approaches.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Boukhobza:2018:ENS,
author = "Jalil Boukhobza and St{\'e}phane Rubini and Renhai
Chen and Zili Shao",
title = "Emerging {NVM}: a Survey on Architectural Integration
and Research Challenges",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "14:1--14:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3131848",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "There has been a surge of interest in Non-Volatile
Memory (NVM) in recent years. With many advantages,
such as density and power consumption, NVM is carving
out a place in the memory hierarchy and may eventually
change our view of computer architecture. Many NVMs
have emerged, such as Magnetoresistive random access
memory (MRAM), Phase Change random access memory (PCM),
Resistive random access memory (ReRAM), and
Ferroelectric random access memory (FeRAM), each with
its own peculiar properties and specific challenges.
The scientific community has carried out a substantial
amount of work on integrating those technologies in the
memory hierarchy. As many companies are announcing the
imminent mass production of NVMs, we think that it is
time to have a step back and discuss the body of
literature related to NVM integration. This article
surveys state-of-the-art work on integrating NVM into
the memory hierarchy. Specially, we introduce the four
types of NVM, namely, MRAM, PCM, ReRAM, and FeRAM, and
investigate different ways of integrating them into the
memory hierarchy from the horizontal or vertical
perspectives. Here, horizontal integration means that
the new memory is placed at the same level as an
existing one, while vertical integration means that the
new memory is interleaved between two existing levels.
In addition, we describe challenges and opportunities
with each NVM technique.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gao:2018:ECI,
author = "Congming Gao and Liang Shi and Yejia Di and Qiao Li
and Chun Jason Xue and Kaijie Wu and Edwin Sha",
title = "Exploiting Chip Idleness for Minimizing Garbage
Collection-Induced Chip Access Conflict on {SSDs}",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "15:1--15:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3131850",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Solid state drives (SSDs) are normally constructed
with a number of parallel-accessible flash chips, where
host I/O requests are processed in parallel. In
addition, there are many internal activities in SSDs,
such as garbage collection and wear leveling induced
read, write, and erase operations, to solve the issues
of inability of in-place updates and limited lifetime.
When internal activities are triggered on a chip, the
chip will be blocked. Our preliminary studies on
several workloads show that when internal activities
are frequently triggered, the host I/O performance will
be significantly impacted because of the access
conflict between them. In this work, in order to
improve the access conflict induced performance
degradation, a novel access conflict minimization
scheme is proposed. The basic idea of the scheme is
motivated by an interesting observation in SSDs:
several chips are idle when other chips are busy with
internal activities and host I/O requests. Based on
this observation, we propose to schedule internal
activities induced operations for minimized access
conflict by exploiting the idleness of the multiple
chips of SSDs. This approach is realized by two steps:
First, read internal activities accessed data to the
controller; second, by exploiting the idle chips during
internal activities, write internal activities accessed
data back to these idle chips. With this scheme, the
internal activities can be processed with minimized
access conflict to the host requests. Simulation
results show that the proposed approach significantly
reduces the access conflict, and in turn leads to a
significant performance improvement of SSDs.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jun:2018:RBD,
author = "Jaeyung Jun and Kyu Hyun Choi and Hokwon Kim and Sang
Ho Yu and Seon Wook Kim and Youngsun Han",
title = "Recovering from Biased Distribution of Faulty Cells in
Memory by Reorganizing Replacement Regions through
Universal Hashing",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "16:1--16:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3131241",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recently, scaling down dynamic random access memory
(DRAM) has become more of a challenge, with more faults
than before and a significant degradation in yield. To
improve the yield in DRAM, a redundancy repair
technique with intra-subarray replacement has been
extensively employed to replace faulty elements (i.e.,
rows or columns with defective cells) with spare
elements in each subarray. Unfortunately, such
technique cannot efficiently handle a biased
distribution of faulty cells because each subarray has
a fixed number of spare elements. In this article, we
propose a novel redundancy repair technique that uses a
hashing method to solve this problem. Our hashing
technique reorganizes replacement regions by changing
the way in which their replacement information is
referred, thus making faulty cells become evenly
distributed to the regions. We also propose a fast
repair algorithm to find the best hash function among
all possible candidates. Even if our approach requires
little hardware overhead, it significantly improves the
yield when compared with conventional redundancy
techniques. In particular, the results of our
experiment show that our technique saves spare elements
by about 57\% and 55\% for a yield of 99\% at BER 1e-6
and 5e-7, respectively.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhou:2018:RRD,
author = "Hongxia Zhou and Chiu-Wing Sham and Hailong Yao",
title = "Revisiting Routability-Driven Placement for Analog and
Mixed-Signal Circuits",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "17:1--17:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3131849",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The exponential increase in scale and complexity of
very large-scale integrated circuits (VLSIs) poses a
great challenge to current electronic design automation
(EDA) techniques. As an essential step in the whole EDA
layout synthesis, placement is attracting more and more
attention, especially for analog and mixed-signal
integrated circuits. Recently, experts in this field
have observed a variety of analog-specific layout
constraints to obtain high-performance placement
solutions. These constraints include symmetry,
alignment, boundary, preplace, abutment, range and
maximum separation, and routability of the placement
solutions. In this article, the effectiveness of
slicing and nonslicing representation is investigated.
Additionally, the technique of congestion-based virtual
sizing is proposed. Experimental results show that the
routability can be improved significantly by applying
congestion-based virtual sizing. Results also show that
the slicing representation can improve the regularity
of the placement solutions and hence improve the
routability with higher efficiency compared to the
nonslicing representation.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2018:ACS,
author = "Shao-Chung Wang and Li-Chen Kan and Chao-Lin Lee and
Yuan-Shin Hwang and Jenq-Kuen Lee",
title = "Architecture and Compiler Support for {GPUs} Using
Energy-Efficient Affine Register Files",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "18:1--18:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3133218",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "A modern GPU can simultaneously process thousands of
hardware threads. These threads are grouped into
fixed-size SIMD batches executing the same instruction
on vectors of data in a lockstep to achieve high
throughput and performance. The register files are huge
due to each SIMD group accessing a dedicated set of
vector registers for fast context switching, and
consequently the power consumption of register files
has become an important issue. One proposed solution is
to replace some of the vector registers by scalar
registers, as different threads in a same SIMD group
operate on scalar values and so the redundant
computations and accesses of these scalar values can be
eliminated. However, it has been observed that a
significant number of registers containing affine
vectors $ \upsilon $ such that $ \upsilon [i] = b + i
\times s $ can be represented by base $b$ and stride
$s$. Therefore, this article proposes an affine
register file design for GPUs that is energy efficient
due to it reducing the redundant executions of both the
uniform and affine vectors. This design uses a pair of
registers to store the base and stride of each affine
vector and provides specific affine ALUs to execute
affine instructions. A method of compiler analysis has
been developed to detect scalars and affine vectors and
annotate instructions for facilitating their
corresponding scalar and affine computations.
Furthermore, a priority-based register allocation
scheme has been implemented to assign scalars and
affine vectors to appropriate scalar and affine
register files. Experimental results show that this
design was able to dispatch 43.56\% of the computations
to scalar and affine ALUs when using eight scalar and
four affine registers per warp. This resulted in the
current design also reducing the energy consumption of
the register files and ALUs to 21.86\% and 26.54\%,
respectively, and it reduced the overall energy
consumption of the GPU by an average of 5.18\%.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pereira-Santos:2018:RFB,
author = "Leonardo Pereira-Santos and Gabriel Luca Nazar and
Luigi Carro",
title = "Repair of {FPGA}-Based Real-Time Systems With Variable
Slacks",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "19:1--19:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3144533",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Field-programmable gate arrays (FPGAs) based on SRAM
cells are an attractive alternative for real-time
system designers, as they offer high density, low cost,
and high performance. The use of SRAM cells in the
FPGA's configuration memory, while enabling these
desirable characteristics, also creates a reliability
hazard as RAM cells are susceptible to single-event
upsets (SEUs). The usual approach is the use of double
or triple redundancy allied with a correction
mechanism, such as periodic scrubbing. Although
scrubbing is an effective technique to remove
SEU-induced errors, the repair of real-time systems
presents specific challenges, such as avoiding failures
by missing real-time deadlines. In this article, a
novel approach is proposed to use a deadline-aware
scrubbing scheme with negligible area costs that
dynamically chooses the scrubbing starting position.
Such a scheme allows us to avoid missing real-time
deadlines while maximizing the repair probability given
a bounded repair time. Our approach reduces the failure
rate, considering the probability of missing deadlines
due to faults, by 33.39\% on average, with an average
area cost of 1.23\%.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2018:CMD,
author = "Chen-Hsuan Lin and Lu Wan and Deming Chen",
title = "{C-Mine}: Data Mining of Logic Common Cases for
Improved Timing Error Resilience with Energy
Efficiency",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "20:1--20:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3144534",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The better-than-worst-case (BTW) design methodology
can achieve higher circuit energy efficiency,
performance, or reliability by allowing timing errors
for rare cases and rectifying them with error
correction mechanisms. Therefore, the performance of
BTW design heavily depends on the correctness of common
cases, which are frequent input patterns in a workload.
However, most existing methods do not provide
sufficiently scalable solutions and also overlook the
whole picture of the design. Thus, we propose a new
technique, common-case mining method (C-Mine), which
combines two scalable techniques, data mining and
Boolean satisfiability (SAT) solving, to overcome these
limitations. Data mining can efficiently extract
patterns from an enormous dataset, and SAT solving is
famous for its scalable verification. In this article,
we present two versions of C-Mine, C-Mine-DCT and
C-Mine-APR, which aim at faster runtime and better
energy saving, respectively. The experimental results
show that, compared to a recent publication, C-Mine-DCT
can achieve compatible performance with an additional
8\% energy savings and 54x speedup for bigger
benchmarks on average. Furthermore, C-Mine-APR can
achieve up to 13\% more energy saving than C-Mine-DCT
while confronting designs with more common cases.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Rosvall:2018:FTA,
author = "Kathrin Rosvall and Ingo Sander",
title = "Flexible and Tradeoff-Aware Constraint-Based Design
Space Exploration for Streaming Applications on
Heterogeneous Platforms",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "21:1--21:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3133210",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Due to its complexity, the problem of mapping and
scheduling streaming applications on heterogeneous
MPSoCs under real-time and performance constraints has
traditionally been tackled by incomplete heuristic
algorithms. In recent years, approaches based on
Constraint Programming (CP) have shown promising
results as complete methods for finding optimal
mappings, in particular concerning throughput. However,
so far none of the available CP approaches consider the
tradeoff between throughput and buffer requirements or
throughput and power consumption. This article
integrates tradeoff awareness into the CP model and
introduces a two-step solving approach that utilizes
the advantages of heuristics, while still keeping the
completeness property of CP. With a number of
experiments considering several streaming applications
and different platform models, the article illustrates
not only the efficiency of the presented model but also
its suitability for solving different problems with
various combinations of performance constraints.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Knechtel:2018:MOF,
author = "Johann Knechtel and Jens Lienig and Ibrahim (Abe) M.
Elfadel",
title = "Multi-Objective {$3$D} Floorplanning with Integrated
Voltage Assignment",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "22:1--22:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3149817",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Voltage assignment is a well-known technique for
circuit design, which has been applied successfully to
reduce power consumption in classical 2D integrated
circuits (ICs). Its usage in the context of 3D ICs has
not been fully explored yet although reducing power in
3D designs is of crucial importance, for example, to
tackle the ever-present challenge of thermal
management. In this article, we investigate the
effective and efficient partitioning of 3D designs into
multiple voltage domains during the floorplanning step
of physical design. In particular, we introduce,
implement, and evaluate novel algorithms for effective
integration of voltage assignment into the inner
floorplanning loops. Our algorithms are compatible not
only with the traditional objectives of 2D
floorplanning but also with the additional objectives
and constraints of 3D designs, including the planning
of through-silicon vias (TSVs) and the thermal
management of stacked dies. We test our 3D floorplanner
extensively on the GSRC benchmarks as well as on an
augmented version of the IBM-HB+ benchmarks. The 3D
floorplans are shown to achieve effective trade-offs
for power and delays throughout different
configurations-our results surpass na{\"\i}ve low-power
and high-performance voltage assignment by 17\% and
10\%, on average. Finally, we release our 3D
floorplanning framework as open-source code.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2018:HEP,
author = "Kun Yang and Haoting Shen and Domenic Forte and Swarup
Bhunia and Mark Tehranipoor",
title = "Hardware-Enabled Pharmaceutical Supply Chain
Security",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "23:1--23:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3144532",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The pharmaceutical supply chain is the pathway through
which prescription and over-the-counter (OTC) drugs are
delivered from manufacturing sites to patients.
Technological innovations, price fluctuations of raw
materials, as well as tax, regulatory, and market
demands are driving change and making the
pharmaceutical supply chain more complex. Traditional
supply chain management methods struggle to protect the
pharmaceutical supply chain, maintain its integrity,
enhance customer confidence, and aid regulators in
tracking medicines. To develop effective measures that
secure the pharmaceutical supply chain, it is important
that the community is aware of the state-of-the-art
capabilities available to the supply chain owners and
participants. In this article, we will be presenting a
survey of existing hardware-enabled pharmaceutical
supply chain security schemes and their limitations. We
also highlight the current challenges and point out
future research directions. This survey should be of
interest to government agencies, pharmaceutical
companies, hospitals and pharmacies, and all others
involved in the provenance and authenticity of
medicines and the integrity of the pharmaceutical
supply chain.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Noltsis:2018:RSC,
author = "Michail Noltsis and Dimitrios Rodopoulos and Nikolaos
Zompakis and Francky Catthoor and Dimitrios Soudris",
title = "Runtime Slack Creation for Processor Performance
Variability using System Scenarios",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "24:1--24:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3152158",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern microprocessors contain a variety of mechanisms
used to mitigate errors in the logic and memory,
referred to as Reliability, Availability, and
Serviceability (RAS) techniques. Many of these
techniques, such as component disabling, come at a
performance cost. With the aggressive downscaling of
device dimensions, it is reasonable to expect that
chip-wide error rates will intensify in the future and
perhaps vary throughout system lifetime. As a result,
it is important to reclaim the temporal RAS overheads
in a systematic way and enable dependable performance.
The current article presents a closed-loop control
scheme that actuates processor's frequency based on
detected timing interference to ensure performance
dependability. The concepts of slack and deadline
vulnerability factor are introduced to support the
formulation of a discrete time control problem. Default
application timing is derived using the system scenario
methodology, the applicability of which is demonstrated
through simulations. Additionally, the proposed concept
is demonstrated on a real platform and application: a
Proportional-Integral-Differential controller,
implemented within the application, actuates the
Dynamic Voltage and Frequency Scaling (DVFS) framework
of the Linux kernel to effectively reclaim temporal
overheads injected at runtime. The current article
discusses the responsiveness and energy efficiency of
the proposed performance dependability scheme. Finally,
additional formulation is introduced to predict the
upper bound of timing interference that can be absorbed
by actuating the DVFS of any processor and is also
validated on a representative reduction to practice.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shafiee:2018:DFB,
author = "M. Shafiee and N. Beohar and P. Bakliwal and S. Roy
and D. Mandal and B. Bakkaloglu and S. Ozev",
title = "A Disturbance-Free Built-In Self-Test and Diagnosis
Technique for {DC--DC} Converters",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "25:1--25:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3152157",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Complex electronic systems include multiple power
domains and drastically varying dynamic power
consumption patterns, requiring the use of multiple
power conversion and regulation units. High-frequency
switching converters have been gaining prominence in
the DC-DC converter market due to their high efficiency
and smaller form factor. Unfortunately, they are also
subject to higher process variations, and faster
in-field degradation, jeopardizing stable operation of
the power supply. This article presents a technique to
track changes in the dynamic loop characteristics of
DC-DC converters without disturbing the normal mode of
operation using a white noise-based excitation and
correlation. Using multiple points for injection and
analysis, we show that the degraded part can be
diagnosed to take remedial action. White noise
excitation is generated via a pseudo-random disturbance
at reference, load current, and pulse-width modulation
(PWM) nodes of the converter with the test signal
energy being spread over a wide bandwidth, without
significantly affecting the converter noise and ripple
floor. The impulse response is extracted by correlating
the random input sequence with the disturbed output
generated. Test signal analysis is achieved by
correlating the pseudo-random input sequence with the
output response and thereby accumulating the desired
behavior over time and pulling it above the noise floor
of the measurement set-up. An off-the-shelf power
converter, LM27402, is used as the device-under-test
(DUT) for experimental verification. Experimental
results show that the proposed technique can estimate
converter natural frequency and quality factor
($Q$-factor) within $ \pm 2.5$ \% and $ \pm 0.7$ \%
error margin respectively, over changes in load
inductance and capacitance. For the diagnosis purpose,
a measure of inductor's DC resistance (DCR) value,
which is the inductor's series resistance and
indicative of the degradation in inductor's $Q$-factor,
is estimated within less than $ \pm 1.6$ \% error
margin.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Emeretlis:2018:SMA,
author = "Andreas Emeretlis and George Theodoridis and
Panayiotis Alefragis and Nikolaos Voros",
title = "Static Mapping of Applications on Heterogeneous
Multi-Core Platforms Combining Logic-Based {Benders}
Decomposition with Integer Linear Programming",
journal = j-TODAES,
volume = "23",
number = "2",
pages = "26:1--26:??",
month = jan,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3133219",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The proper mapping of an application on a multi-core
platform and the scheduling of its tasks are key
elements to achieve the maximum performance. In this
article, a novel hybrid approach based on integrating
the Logic-Based Benders Decomposition (LBBD) principle
with a pure Integer Linear Programming (ILP) model is
introduced for mapping applications described by
Directed Acyclic Graphs (DAGs) on platforms consisting
of heterogeneous cores. The LBBD approach combines two
optimization techniques with complementary strengths,
namely ILP and Constraint Programming (CP), and is
employed as a cut generation scheme. The generated
constraints are utilized by the ILP model to cut
possible assignment combinations aiming at improving
the solution or proving the optimality of the
best-found one. The introduced approach was applied
both on synthetic DAGs and on DAGs derived from real
applications. Through the proposed approach, many
problems were optimally solved that could not be solved
by any of the above methods (ILP, LBBD) alone within a
time limit of 2 hours, while the overall solution time
was also significantly decreased. Specifically, the
hybrid method exhibited speedups equal to $ 4.2 \times
$ for the synthetic instances and $ 10 \times $ for the
real-application DAGs over the LBBD approach and two
orders of magnitude over the ILP model.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gomez:2018:SCP,
author = "Andres F. Gomez and Victor Champac",
title = "Selection of Critical Paths for Reliable Frequency
Scaling under {BTI}-Aging Considering Workload
Uncertainty and Process Variations Effects",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "27:1--27:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177864",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Conventional clock guardbanding to assure a circuit's
reliable operation under device aging due to NBTI/PBTI
and process variations introduce significant
performance loss in modern nanometer circuits. Dynamic
Frequency Scaling (DFS) is a more efficient technique
that allows us to adjust the system clock frequency
according to the process condition and aging
deterioration of the circuit. At the design phase, the
DFS technique requires the identification of the logic
paths to be monitored to introduce the required
circuitry to monitor their delay. However, critical
path identification is a complex problem due to three
major challenges: (1) The critical paths of the circuit
depend on the stress duty cycle of the devices, which
are unknown in advance at design phase; (2) the
critical paths of the circuit depend on the process
parameters variations, whose impact on delay depend on
the spatial correlation due to proximity at the circuit
layout; and (3) the critical paths reordering
probability may change over time due to aging. This
article presents a methodology for efficient selection
of the critical paths to be monitored under a DFS
framework, addressing the aforementioned challenges.
Experimental results on ISCAS 85/89 benchmark circuits
show the feasibility of the proposed approach to select
a restricted path set while providing reliable aging
monitoring.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2018:PSC,
author = "Sheng-Min Huang and Li-Pin Chang",
title = "Providing {SLO} Compliance on {NVMe SSDs} Through
Parallelism Reservation",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "28:1--28:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3174867",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Non-Volatile Memory Express (NVMe) is a specification
for next-generation solid-state disks (SSDs). Benefited
from the massive internal parallelism and the
high-speed PCIe bus, NVMe SSDs achieve extremely high
data transfer rates, and they are an ideal solution of
shared storage in virtualization environments.
Providing virtual machines with Service Level Objective
(SLO) compliance on NVMe SSDs is a challenging task,
because garbage collection activities inside of NVMe
SSDs globally affect the I/O performance of all virtual
machines. In this study, we introduce a novel approach,
called parallelism reservation, which is inspired by
the rich internal parallelism of NVMe SSDs. The degree
of parallelism stands for how many flash chips are
concurrently active. Our basic idea is to reserve
sufficient degrees of parallelism for read, write, and
garbage collection operations, making sure that an NVMe
SSD delivers stable read and write throughput and
reclaims free space at a constant rate. The stable read
and write throughput are proportionally distributed
among virtual machines for SLO compliance. Our
experimental results show that our parallelism
reservation approach delivered satisfiable throughput
and highly predictable response to virtual machines.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2018:RRE,
author = "Kun Yang and Domenic Forte and Mark Tehranipoor",
title = "{ReSC}: an {RFID-Enabled} Solution for Defending {IoT}
Supply Chain",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "29:1--29:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3174850",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The Internet of Things (IoT), an emerging global
network of uniquely identifiable embedded computing
devices within the existing Internet infrastructure, is
transforming how we live and work by increasing the
connectedness of people and things on a scale that was
once unimaginable. In addition to facilitated
information and service exchange between connected
objects, enhanced computing power and analytic
capabilities of individual objects, and increased
interaction between objects and their environments, the
IoT also raises new security and privacy challenges.
Hardware trust across the IoT supply chain is the
foundation of IoT security and privacy. Two major
supply chain issues --- disappearance/theft of
authentic IoT devices and appearance of unauthentic
ones --- have to be addressed to secure the IoT supply
chain and lay the foundation for further security and
privacy-defensive measures. Comprehensive solutions
that enable IoT device authentication and traceability
across the entire supply chain (i.e., during
distribution and after being provisioned) need to be
established. Existing hardware, software, and network
protection methods, however, do not address IoT supply
chain issues. To mitigate this shortcoming, we propose
an RFID-enabled solution called ReSC that aims at
defending the IoT supply chain. By incorporating three
techniques-one-to-one mapping between RFID tag identity
and control chip identity; unique tag trace, which
records tag provenance and history information; and
neighborhood attestation of IoT devices-ReSC is
resistant to split attacks (i.e., separating tag from
product, swapping tags), counterfeit injection, product
theft throughout the entire supply chain, device
recycling, and illegal network service access (e.g.,
Internet, cable TV, online games, remote firmware
updates). Simulations, theoretical analysis, and
experimental results based on a printed circuit board
(PCB) prototype demonstrate the effectiveness of ReSC.
Finally, we evaluate the security of our proposed
scheme against various attacks.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2018:LBF,
author = "Dongwook Lee and Andreas Gerstlauer",
title = "Learning-Based, Fine-Grain Power Modeling of
System-Level Hardware {IPs}",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "30:1--30:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177865",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Accurate power and performance models are needed to
enable rapid, early system-level analysis and
optimization. There is, however, a lack of fast yet
fine-grain power models of hardware components at such
high levels of abstraction. In this article, we present
novel learning-based approaches for extending fast
functional simulation models of accelerators and other
hardware intellectual property components (IPs) with
accurate cycle-, block-, and invocation-level power
estimates. Our proposed power modeling approach is
based on annotating functional hardware descriptions
with capabilities that, depending on observability,
allow capturing data-dependent resource, block, or
input and output (I/O) activity without a significant
loss in simulation speed. We further leverage advanced
machine learning techniques to synthesize abstract
power models using novel decomposition techniques that
reduce model complexities and increase estimation
accuracy. Results of applying our approach to various
industrial-strength design examples show that our power
models can predict cycle-, basic block-, and
invocation-level power consumption to within 10\%, 9\%,
and 3\% of a commercial gate-level power estimation
tool, respectively, all while running at several order
of magnitude faster speeds of 1-10Mcycles/sec. Model
training and synthesis takes less than 34 minutes in
all cases, including up to 30 minutes for training data
and trace generation using gate-level simulations.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Naderan-Tahan:2018:DCE,
author = "Mahmood Naderan-Tahan and Hamid Sarbazi-Azad",
title = "{Domino Cache}: an Energy-Efficient Data Cache for
Modern Applications",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "31:1--31:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3174848",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The energy consumption for processing modern workloads
is challenging in data centers. Due to the large
datasets of cloud workloads, the miss rate of the L1
data cache is high, and with respect to the energy
efficiency concerns, such misses are costly for memory
instructions because lower levels of memory hierarchy
consume more energy per access than the L1. Moreover,
large last-level caches are not performance effective,
in contrast to traditional scientific workloads. The
aim of this article is to propose a large L1 data
cache, called Domino, to reduce the number of accesses
to lower levels in order to improve the energy
efficiency. In designing Domino, we focus on two
components that use the on-chip area and are not energy
efficient, which makes them good candidates to use
their area for enlarging the L1 data cache. Domino is a
highly associative cache that extends the conventional
cache by borrowing the prefetcher and last-level-cache
storage budget and using it as additional ways for data
cache. In Domino, the additional ways are separated
from the conventional cache ways; hence, the critical
path of the first access is not altered. On a miss in
the conventional part, it searches the added ways in a
mix of parallel-sequential fashion to compromise the
latency and energy consumption. Results on the
Cloudsuite benchmark suite show that read and write
misses are reduced by 30\%, along with a 28\% reduction
in snoop messages. The overall energy consumption per
access is then reduced by 20\% on average (maximum
38\%) as a result of filtering accesses to the lower
levels.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Abolmaali:2018:EFP,
author = "Sheis Abolmaali and Mehdi Kamal and Ali Afzali-Kusha
and Massoud Pedram",
title = "An Efficient False Path-Aware Heuristic Critical Path
Selection Method with High Coverage of the Process
Variation Space",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177866",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present a critical path selection
method that efficiently finds true (sensitizable)
critical paths of a circuit in the presence of process
variations. The method, which is based on the viability
analysis, tries to select the least number of true
critical paths that cover all of circuit critical
gates. Critical gates are those that make a path
critical with a probability higher than a predefined
threshold value. Selecting fewer critical paths leads
to less computation time for the algorithm and shorter
test time of fabricated chips. For this purpose, an
efficient Statistical Static Timing Analysis- (SSTA)
based technique is suggested. This technique tries to
find circuit-critical gates whose process parameter
variations cover a major part of the process space.
Improving the process space coverage using fewer paths
is achieved by considering both spatial (proximity of
gates) and structural (having common gates)
correlations in the analysis of choosing the critical
paths. In the selection process, paths with low
similarities in their characteristics are preferred. In
addition, only true paths whose delays affect the
maximum delay of the circuit are included. The selected
paths can be used in the test process of the fabricated
chips to determine if the chip meets its timing
requirements. Also, a modified viability analysis that
incorporates statistical computations is used in the
SSTA. The efficacy of the proposed method is evaluated
by comparing its results for combinational and
sequential ISCAS benchmarks with those obtained by
exhaustive search. Results indicate although, on
average, only 4.38\% of all the critical paths found by
the exhaustive search are selected by the proposed
method, the maximum probability of criticality for the
paths that are not considered in our method is, on
average, less than 4\%.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jalili:2018:ERM,
author = "Majid Jalili and Hamid Sarbazi-Azad",
title = "Express Read in {MLC} Phase Change Memories",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "33:1--33:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177876",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the era of big data, the capability of computer
systems must be enhanced to support 2.5 quintillion
byte/day data delivery. Among the components of a
computer system, main memory has a great impact on
overall system performance. DRAM technology has been
used over the past four decades to build main memories.
However, the scalability of DRAM technology has faced
serious challenges. To keep pace with the
ever-increasing demand for larger main memory, some new
alternative technologies have been introduced. Phase
change memory (PCM) is considered as one of such
technologies for substituting DRAM. PCM offers some
noteworthy properties such as low static power
consumption, nonvolatility, and capability of storing
more than one bit per cell (multilevel cell, or MLC).
However, the short lifetime and long access latency of
PCM (specifically MLC PCM) require feasible and
efficient solutions. In this article, based on the
observation that applications access a significant
number of read-friendly data blocks, we propose Express
Read to prevent the MLC PCM read circuit to spend
unnecessary time sensing the cells of a memory block. A
read-friendly data block (RFDB) is composed of only
``11'' and ``00'' bit pairs, and thus upon sensing the
most significant bit of a cell, the read operation can
be early terminated to reduce the MLC read time and
power consumption. Moreover, we increase the number of
RFDBs using two simple techniques to better exploit the
benefits of Express Read. Results obtained from
full-system simulation near 6\% performance improvement
and 21\% energy gain, on average, over the baseline
system.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yan:2018:DCR,
author = "Jin-Tai Yan",
title = "Direction-Constrained Rectangle Escape Routing",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "34:1--34:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3178047",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Given a set of buses with available escape directions
inside a chip, a two-phase algorithm is proposed to
assign one feasible escape direction onto any bus such
that the number of used layers is minimized and to
allocate the pin rectangle and the projection rectangle
of any escape bus onto the minimized layers in
direction-constrained rectangle escape routing. In our
proposed algorithm, based on the concept of
two-dimensional maximum density inside a chip, the
escape directions of the buses can be first assigned to
minimize the number of the used layers by iteratively
eliminating unnecessary escape directions for any bus
inside a chip. Furthermore, based on the construction
of the represented intervals and the assignment
constraints for the escape buses, a modified left-edge
algorithm can be used to allocate all the escape buses
onto the minimized layers. Compared with Ma's integer
linear program (ILP)-based algorithm [10] using
lp\_solve and Gurobi in rectangle escape routing, the
experimental results show that our proposed algorithm
obtains the same results but reduces CPU time by 94.2\%
and 35.7\% when using lp\_solve and Gurobi for 16
tested examples with no direction constraint on
average, respectively. Compared with the modified
algorithm from Ma's ILP-based algorithm [10] using
lp\_solve and Gurobi in direction-constrained rectangle
escape routing, the experimental results show that our
proposed algorithm obtains the same results but reduces
CPU time by 94.3\% and 37.7\% when using lp\_solve and
Gurobi for 16 tested examples with direction
constraints on average, respectively. Besides that,
compared with Yan's iterative algorithm, the
experimental results show that our proposed algorithm
increases CPU time by 1.0\% to reduce the number of
used layers 11.1\% for 16 tested examples on average.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2018:MTI,
author = "Shengcheng Wang and Ran Wang and Krishnendu
Chakrabarty and Mehdi B. Tahoori",
title = "Multicast Testing of Interposer-Based {$ 2.5 $D}
{ICs}: Test-Architecture Design and Test Scheduling",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "35:1--35:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177879",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Interposer-based 2.5D integrated circuits (ICs) are
seen today as a precursor to 3D ICs based on
through-silicon vias (TSVs). All the dies in a 2.5D IC
must be adequately tested for product qualification.
However, due to the limited number of package pins, it
is a major challenge to test 2.5D ICs using
conventional methods. Moreover, due to higher
integration levels, test-application time and test
power consumption for 2.5D ICs are also increased
compared to their 2D counterparts. Therefore, it is
imperative to take these issues into account during
2.5D IC testing. In this article, we present an
efficient multicast test architecture for targeting
defects in dies, in which multiple dies can be tested
simultaneously to reduce the test-application time
under constraints on test power and fault coverage. We
also propose a test scheduling and optimization
technique that can be utilized with the multicast test
architecture. By considering the trade-off between
test-application time, test-power budget, and test
quality, the proposed technique provides test schedules
with minimum test-application time under constraints on
power consumption and fault coverage. Compared to
previous work, the proposed technique can reduce
test-application time by up to 53.4 for benchmark
designs while achieving higher fault coverage. Since
the loss in fault coverage due to multicast testing is
extremely small, we can use top-off patterns to achieve
full fault coverage for the dies at negligible
additional cost.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhai:2018:ENG,
author = "Jinyuan Zhai and Changhao Yan and Sheng-Guo Wang and
Dian Zhou and Hai Zhou and Xuan Zeng",
title = "An Efficient Non-{Gaussian} Sampling Method for High
Sigma {SRAM} Yield Analysis",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "36:1--36:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3174866",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Yield$^1$ analysis of SRAM is a challenging issue,
because the failure rates of SRAM cells are extremely
small. In this article, an efficient non-Gaussian
sampling method of cross entropy optimization is
proposed for estimating the high sigma SRAM yield.
Instead of sampling with the Gaussian distribution in
existing methods, a non-Gaussian distribution, i.e., a
joint one-dimensional generalized Pareto distribution
and ( n -1)-dimensional Gaussian distribution, is taken
as the function family of practical distribution, which
is proved to be more suitable to fit the ideal
distribution in the view of extreme failure event. To
minimize the cross entropy between practical and ideal
distributions, a sequential quadratic programming
solver with multiple starting points strategy is
applied for calculating the optimal parameters of
practical distributions. Experimental results show that
the proposed non-Gaussian sampling is a $ 2.2$--$ 4.1
\times $ speedup over the Gaussian sampling, on the
whole, it is about a $ 1.6$--$ 2.3 \times $ speedup
over state-of-the-art methods with low- and
high-dimensional cases without loss of accuracy",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lu:2018:FDR,
author = "Guan-Ruei Lu and Chun-Hao Kuo and Kuen-Cheng Chiang
and Ansuman Banerjee and Bhargab B. Bhattacharya and
Tsung-Yi Ho and Hung-Ming Chen",
title = "Flexible Droplet Routing in Active Matrix-Based
Digital Microfluidic Biochips",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "37:1--37:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3184388",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The active matrix (AM)-based architecture offers many
advantages over conventional digital
electrowetting-on-dielectric (EWOD) microfluidic
biochips, such as the capability of handling
variable-size droplets, more flexible droplet movement,
and precise control over droplet navigation. However, a
major challenge in choosing the routing paths is to
decide when the droplets are to be reshaped depending
on the congestion of the intended path, or split- and
route sub droplets,and merging them at their respective
destinations. As the number of microelectrodes in
AM-EWOD chips is large, the path selection problem
becomes further complicated. In this article, we
propose a negotiation-guided flow based on routing of
subdroplets that obviates the explicit need for
deciding when the droplets are to be manipulated, yet
fully utilizing the power of droplet reshaping,
splitting, and merging them to facilitate their
journey. The proposed algorithm reduces routing cost
and provides more freedom in deadlock avoidance in the
presence of multiple routing tasks by assigning certain
congestion penalty for sibling subdroplets and fluidic
penalty for heterogeneous droplets. Compared to
existing techniques, it reduces latest arrival time by
an average of 29\% for several benchmark and random
test suites. Furthermore, our method is observed to
provide 100\% routability of nets for all test cases,
whereas existing and baseline routers fail to produce
feasible solutions in many instances. We also propose a
reliable mode droplet routing strategy where the number
of unreliable splitting operations can be reduced by
paying a small penalty on latest arrival time.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xie:2018:ADI,
author = "Mimi Xie and Chen Pan and Mengying Zhao and Yongpan
Liu and Chun Jason Xue and Jingtong Hu",
title = "Avoiding Data Inconsistency in Energy Harvesting
Powered Embedded Systems",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "38:1--38:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3182170",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Energy harvesting is becoming a favorable alternative
to power future generation embedded systems, as it is
more environmentally and user friendly. However, energy
harvesting powered embedded systems suffer from
frequent execution interruption due to unstable energy
supply. To tackle this problem, nonvolatile memory has
been deployed to save the whole volatile state for
computation. When power resumes, the processor can
restore the state back to volatile memories and
continue execution. However, without careful
consideration, the process of checkpointing and
resuming could cause inconsistency between volatile and
nonvolatile memories, which leads to irreversible
errors. In this article, we propose a consistency-aware
adaptive checkpointing scheme that ensures correctness
for all checkpoints. The proposed technique efficiently
identifies all possible inconsistency positions in
programs and inserts auxiliary code to ensure
correctness by offline analysis. In addition, adaptive
checkpointing assisted register file profiling and
online tracking techniques further reduce the overhead
of each checkpoint. Evaluation results show that the
proposed checkpointing strategy can successfully
eliminate inconsistency errors and greatly reduce the
checkpointing overhead.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Arcaro:2018:RTG,
author = "Lu{\'\i}s Fernando Arcaro and Karila {Palma Silva} and
R{\^o}mulo {Silva De Oliveira}",
title = "On the Reliability and Tightness of {GP} and
Exponential Models for Probabilistic {WCET}
Estimation",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "39:1--39:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3185154",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As computer architectures evolve, guaranteeing that
Real-Time Systems' (RTSs') timing requirements are met
through Worst Case Execution Time (WCET) upper bounds
becomes increasingly difficult. Techniques such as
Measurement-Based Probabilistic Timing Analysis (MBPTA)
have emerged that estimate WCET bounds exceeded only
with arbitrarily low probabilities (i.e., pWCETs)
through Extreme Value Theory (EVT). The Peaks Over
Threshold (POT) approach for applying EVT involves
adjusting a tail-shaped distribution, e.g., Generalized
Pareto (GP) or Exponential, to the values that exceed a
carefully selected high threshold. Several works
suggest that GP should be used within POT for best
representing different tail shapes, while others
consider the Exponential model more adequate for
providing upper bounds with increased reliability. This
work presents empirical reliability and tightness
evaluations of the pWCET estimates yielded by the GP
and Exponential models while applying MBPTA through the
POT approach. It mainly provides counter-evidence to
the GP model reliability and evidence of the
Exponential model adequacy in this context.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jassi:2018:GGB,
author = "Munish Jassi and Yong Hu and Daniel
Mueller-Gritschneder and Ulf Schlichtmann",
title = "Graph-Grammar-Based {IP}-Integration ({GRIP}) --- An
{EDA} Tool for Software-Defined {SoCs}",
journal = j-TODAES,
volume = "23",
number = "3",
pages = "40:1--40:??",
month = apr,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3139381",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In modern system-on-chip (SoC) designs, IP-reuse is
considered a driving force to increase productivity. To
support various designs, a huge amount of Intellectual
Property (IP) hardware blocks have been developed. The
integration of those IPs into an SoC may require
significant effort-up to days or weeks depending on
experience and complexity. This article presents a
novel approach to significantly reduce the design
effort to bring-up a working SoC design by automatic IP
integration as part of a library-based Software-defined
SoC flow. In detail, the IP-supplier prepares a
HW-accelerated software library (HASL) for the SoC
architect, who wants to use the IP in an SoC design. As
a key point of our approach, integration knowledge is
encoded in the library as a set of integration rules.
These rules are defined in the machine-readable
standardized IP-XACT format by the IP supplier, who has
a good knowledge of the IP's hardware details. The
library preparation step on the IP supplier's side is
also partly automated in the proposed flow, including a
partial generation of configurable HW drivers,
schedulers, and the software library functions. For the
SoC architect, we have developed the
graph-grammar-based IP-integration (GRIP) tool. The
software application is developed using the functions
supplied in the HASL. According to the calls to the
HASL functions, the GRIP tool automatically integrates
IP-blocks using the rule information supplied with the
library and runs a full Design Space Exploration. For
this, the SoC architecture and rules are transformed
into the graph domain to apply graph rewriting methods.
The GRIP tool is model-driven and based on the Eclipse
Modeling Framework. With code generation techniques,
SoC candidate architectures can be transformed to
hardware descriptions for the target platform. The
HW/SW interfaces between SW library functions and IP
blocks can be automatically generated for bare-metal or
Linux-based applications. The approach is demonstrated
with two case-studies on the Xilinx Zynq-based ZedBoard
evaluation board using a HASL for computer vision. It
can yield $ 10 \times $--$ 150 \times $ performance
improvement for the bare-metal application versions and
$ 4 \times $--$ 7 \times $ performance improvement for
the Linux-based application versions, when executed on
an optimized HW-accelerated SoC architecture compared
to a non HW-accelerated SoC. The effort for IP
integration is comparable to using a software library,
hence, providing a significant advantage over a manual
IP integration.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chu:2018:ISS,
author = "Chris Chu and Mustafa Ozdal",
title = "Introduction to the Special Section on Advances in
Physical Design Automation",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "41:1--41:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3199220",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2018:UHP,
author = "Wuxi Li and Yibo Lin and Meng Li and Shounak Dhar and
David Z. Pan",
title = "{UTPlaceF 2.0}: a High-Performance Clock-Aware {FPGA}
Placement Engine",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "42:1--42:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3174849",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern field-programmable gate array (FPGA) devices
contain complex clock architectures on top of
configurable logics. Unlike application specific
integrated circuits (ASICs), the physical structure of
clock networks in an FPGA is pre-manufactured and
cannot be adjusted to different applications.
Furthermore, clock routing resources are typically
limited for high-utilization designs. Consequently,
clock architectures impose extra clock constraints and
further complicate physical implementation tasks such
as placement. Traditional ASIC placement techniques
only optimize conventional design metrics such as
wirelength, routability, power, and timing without
clock legality consideration. It is imperative to have
new techniques to honor clock constraints during
placement for FPGAs. In this article, we propose a
high-performance FPGA placement engine, UTPlaceF 2.0,
that optimizes wirelength and routability while
honoring complex clock constraints. Our proposed
approaches consist of an iterative
minimum-cost-flow-based cell assignment as well as a
clock-aware packing for producing clock-legal yet
high-quality placement solutions. UTPlaceF 2.0 won
first place in the ISPD'17 clock-aware FPGA placement
contest organized by Xilinx, outperforming the second-
and the third-place winners by 4.0\% and 10.0\%,
respectively, in routed wirelength with competitive
runtime, on a set of industry benchmarks.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Darav:2018:ELH,
author = "Nima Karimpour Darav and Ismail S. Bustany and Andrew
Kennings and David Westwick and Laleh Behjat",
title = "{Eh?Legalizer}: a High Performance Standard-Cell
Legalizer Observing Technology Constraints",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "43:1--43:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3158215",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The legalization step is performed after global
placement where wire length and routability are
optimized or during timing optimization where buffer
insertion or gate sizing are applied to meet timing
requirements. Therefore, an ideal legalization approach
must preserve the quality of the input placement in
terms of routability, wire length, and timing
constraints. These requirements indirectly impose
maximum and average cell movement constraints during
legalization. In addition, the legalization step should
effectively manage white space availability with a
highly efficient runtime in order to be used in an
iterative process such as timing optimization. In this
article, a robust and fast legalization method called
Eh?Legalizer for standard-cell placement is presented.
Eh?Legalizer legalizes input placements while
minimizing the maximum and average cell movements using
a highly efficient novel network flow-based approach.
In contrast to the traditional network flow-based
legalizers, areas with high cell utilizations are
effectively legalized by finding several candidate
paths and there is no need for a post-process step. The
experimental results conducted on several benchmarks
show that Eh?Legalizer results in 2.5 times and 3.3
times less the maximum and average cell movement,
respectively, while its runtime is significantly ($ 18
\times $) lower compared to traditional legalizers. In
addition, the experimental results illustrate the
scalability and robustness of Eh?Legalizer with respect
to the floorplan complexity. Finally, the
detailed-routing results show detailed-routing
violations are reduced on average by 23\% when
Eh?Legalizer is used to generate legal solutions.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Wang:2018:VAG,
author = "Chen Wang and Yanan Sun and Shiyan Hu and Li Jiang and
Weikang Qian",
title = "Variation-Aware Global Placement for Improving
Timing-Yield of Carbon-Nanotube Field Effect Transistor
Circuit",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "44:1--44:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3175500",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As the conventional silicon-based CMOS technology
marches toward the sub-10nm region, the problem of high
power density becomes increasingly serious. Under this
circumstance, the carbon-nanotube field effect
transistors (CNFETs) emerge as a promising alternative
to the conventional silicon-based CMOS devices.
However, they experience a much larger variation than
the silicon-based CMOS devices, which results in a
large circuit delay variation and hence, a significant
timing yield loss. One of the main variation sources is
the carbon-nanotube (CNT) density variation. However,
it shows a special property not existing for
silicon-based CMOS devices, namely the asymmetric
spatial correlation. In this work, we propose novel
global placement algorithms to reduce the timing yield
loss caused by the CNT density variation. To
effectively reduce the statistical circuit delay, we
first develop a statistical delay measure for a segment
of gates. Based on this measure, we further develop a
segment-based strategy and a path-based placement
strategy to reduce the delays of the statistically
critical paths. Experimental results demonstrated that
both of our approaches effectively improve the timing
yield.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2018:MRB,
author = "Kuen-Wey Lin and Yeh-Sheng Lin and Yih-Lang Li and
Rung-Bin Lin",
title = "A Maze Routing-Based Methodology With Bounded
Exploration and Path-Assessed Retracing for Constrained
Multilayer Obstacle-Avoiding Rectilinear {Steiner} Tree
Construction",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "45:1--45:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177878",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Owing to existing intellectual properties, prerouted
nets, and power/ground wires, the routing of a system
on chip design demands to detour around multilayer
obstacles. Traditional approaches for the multilayer
obstacle-avoiding rectilinear Steiner tree (ML-OARST)
problem are thus nonmaze routing-based approaches for
runtime issues, yet they cannot be directly applied to
deal with additional constraints such as variant edge
weights on a routing layer. In this article, we propose
the maze routing-based methodology with bounded
exploration and path-assessed retracing to reduce
runtime and routing cost for the constrained ML-OARST
construction problem. The exploration of maze routing
is bounded to reduce the runtime; the costs of
connecting pins are computed to select Steiner points
in the retracing phase. To further reduce the routing
cost, we develop a Steiner point-based ripping-up and
rebuilding scheme for altering tree topology.
Experimental results on industrial and randomly
generated benchmarks demonstrate that the proposed
methodology can provide a solution with good quality in
terms of routing cost and has a significant speedup
compared to traditional maze routing. A commercial tool
is also used to show the effectiveness of the proposed
methodology.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jiao:2018:OER,
author = "Fengxian Jiao and Sheqin Dong",
title = "Ordered Escape Routing with Consideration of
Differential Pair and Blockage",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "46:1--46:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3185783",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Ordered escape routing is a critical issue in
high-speed PCB routing. Differential pair and
thermal-blockage-avoided are useful in PCB design to
obtain high noise immunity and low electromagnetic
interference. In this article, a Min-cost
Multi-commodity Flow (MMCF) approach is proposed to
solve the ordered escape routing. First, the
characteristic of grid pin array and staggered pin
array is analyzed and then a basic network model is
used to convert ordered escape routing to MMCF model.
To satisfy the constraints of ordered escape routing,
three novel transformations, such as non-crossing
transformation, ordering transformation, and capacity
transformation, are used to convert the basic network
model to the final correct MMCF model. After that, the
differential pair in ordered escape routing is
discussed. Finally, a method to deal with the blockage
issue is proposed. Experimental results show that our
method achieves 100\% routability for all the test
cases. The method can get both a feasible solution and
an optimal solution for ordered escape routing.
Compared to published approaches, our method improves
in both wire length and CPU time remarkably. At the
same time, the proposed method can effectively avoid
the blockage and deal with the differential pair.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Liu:2018:RML,
author = "Bo Liu and Gong Chen and Bo Yang and Shigetoshi
Nakatake",
title = "Routable and Matched Layout Styles for Analog Module
Generation",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "47:1--47:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3182169",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Two$^1$ novel automatic generation methods for analog
layout-a symmetrical twin-row method for MOS
transistors and a twisted common-centroid method for
capacitor arrays-are introduced. Based on the proposed
layout styles and the corresponding algorithms, the
symmetry and common-centroid placement patterns for
analog devices are realized to guarantee matching
properties. On this basis, as the most prominent
contribution of this article, channel routing-based
algorithms for the proposed layout styles are presented
and could achieve 100\% routability due to
well-arranged devices and corresponding low routing
complexity. The algorithms benefits include a small
layout area that maximizes the diffusion-sharing of MOS
transistors and less routing layer usage for
common-centroid device arrays. Moreover, we
successfully applied our algorithms to the layout
designs of two typical analog modules including a
two-stage operating amplifier and a Successive
Approximation Register Analog-to-Digital Converter
(SAR-ADC). The generated layouts and the circuit
simulation results demonstrate the effectiveness of our
algorithms in terms of their routability and matching
properties. Our algorithms can also be extended to
apply to a variety of essential MOS analog circuits.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2018:ICA,
author = "Pei-Yu Lee and Iris Hui-Ru Jiang",
title = "{iTimerM}: a Compact and Accurate Timing Macro Model
for Efficient Hierarchical Timing Analysis",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "48:1--48:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3149818",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "As designs continue to grow in size and complexity,
EDA paradigm shifts from flat to hierarchical timing
analysis. In this article, we present compact and
accurate timing macro modeling, which is the key to
efficient and accurate hierarchical timing analysis.
Our goal is to contain only a minimal amount of
interface logic in our timing macro model. The main
idea is to separate the interface logic into variant
and constant timing regions. Then, the variant timing
region is reserved for accuracy, while the constant
timing region is reduced for compactness. For reducing
the constant timing region, we propose anchor pin
insertion and deletion by generalizing existing timing
graph reduction techniques. Furthermore, we devise a
lookup table index selection technique to achieve high
model accuracy over the possible operating condition
range. Compared with two common models used in
industry, extracted timing model and interface logic
model, our model has high model accuracy and small
model size. Based on the TAU 2016 and 2017 timing macro
modeling contest benchmark suites, our results show
that our algorithm delivers superior efficiency and
accuracy: Hierarchical timing analysis using our model
can significantly reduce runtime and memory compared
with flat timing analysis on the original design.
Moreover, our algorithm outperforms TAU 2016 and 2017
contest winners in model accuracy, model size, model
generation performance, and model usage performance.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sadat:2018:OAL,
author = "Sayed Abdullah Sadat and Mustafa Canbolat and
Sel{\c{c}}uk K{\"o}se",
title = "Optimal Allocation of {LDOs} and Decoupling Capacitors
within a Distributed On-Chip Power Grid",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "49:1--49:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3177877",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Parallel on-chip voltage regulation, where multiple
regulators are connected to the same power grid, has
recently attracted significant attention with the
proliferation of small on-chip voltage regulators. In
this article, the number, size, and location of
parallel low-dropout (LDO) regulators and intentional
decoupling capacitors are optimized using mixed integer
non-linear programming formulation. The proposed
optimization function concurrently considers multiple
objectives such as area, power noise, and overall power
consumption. Certain objectives are optimized by
putting constraints on the other objectives with the
proposed technique. Additional constraints have been
added to avoid the overlap of LDOs and decoupling
capacitors in the optimization process. The results of
an optimized LDO allocation in the POWER8 chip is
compared with the recent LDO allocation in the same IBM
chip in a case study where a 20\% reduction in the
noise is achieved. The results of the proposed
multi-criteria objective function under a different
area, power, and noise constraints are also evaluated
with a sample ISPD'11 benchmark circuits in another
case study.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Cakir:2018:RED,
author = "Burcin Cakir and Sharad Malik",
title = "Reverse Engineering Digital {ICs} through Geometric
Embedding of Circuit Graphs",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "50:1--50:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3193121",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Outsourcing of design and manufacturing processes
makes integrated circuits (ICs) vulnerable to
adversarial changes and raises concerns about their
integrity. Reverse engineering the manufactured netlist
helps identify malicious insertions. In this article,
we present an automated approach that, given a
reference design description with high-level blocks,
infers these blocks in an untrusted gate-level (test)
implementation. Using the graph connectivity of the
netlists, we compute a geometric embedding for each
wire in the circuits, which, then, is used to compute a
bipartite matching between the nodes of the two designs
and identify high-level blocks in the test circuit.
Experiments to evaluate the efficacy of the proposed
technique on various-sized designs, including the
multi-core processor OpenSparc T1, show that it can
correctly match over 90\% of gates in the test circuit
to their corresponding block in the reference model.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Ittershagen:2018:IFM,
author = "Philipp Ittershagen and Kim Gr{\"u}ttner and Wolfgang
Nebel",
title = "An Integration Flow for Mixed-Critical Embedded
Systems on a Flexible Time-Triggered Platform",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "51:1--51:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3190837",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The rise of mixed-critical embedded systems imposes
novel challenges on the specification, development, and
functional validation in a design flow. In the emerging
dynamic scheduling context of mixed-criticality
platforms, the system behaviour needs to be estimated
in an early step in the design flow to assess the
integration impact, especially for quality of
service-driven, low-critical subsystems. We provide a
modelling and integration flow for specifying,
estimating, and evaluating software functions, ranging
from an initial executable specification to an
implementation candidate on an MPSoC. Based on a
data-driven model to evaluate dynamic resource
consumption effects of high-critical subsystems and the
scheduling overhead, we propose a systematic method for
constructing workload models of high-critical software
components on the target. Our proxies provide an
integration environment for low-critical functions by
mimicking the high-critical temporal behaviour on the
target. By integrating a low-critical video encoding
subsystem with a benchmark suite as the high-critical
subsystem we show that the performance model allows for
evaluating end-to-end execution times in the
low-critical function with an average error of 0.37\%
and the application proxy only introduces a maximum
error of 1.14\% in a performance evaluation.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2018:ESA,
author = "Yung-Chih Chen",
title = "Enhancements to {SAT} Attack: Speedup and Breaking
Cyclic Logic Encryption",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "52:1--52:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3190853",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Logic encryption is an IC protection technique for
preventing an IC design from overproduction and
unauthorized use. It hides a design's functionality by
inserting key gates and key inputs, such that a secret
key is required to activate the design and make it
function correctly. The security of a logic encryption
algorithm is evaluated according to the difficulty of
cracking the secret key. The state-of-the-art attack
method identifies a secret key with a series of
SAT-solving calls to prune all the incorrect keys.
Although it can break most of the existing logic
encryption algorithms within a few hours, we observe
that there exist two enhancements for increasing its
efficiency. First, we introduce a preprocess to
identify and eliminate redundant key inputs and
simplify SAT problems. Second, we present a key
checking process for increasing the pruned incorrect
keys in each SAT-solving iteration. We conducted the
experiments on a set of benchmark circuits encrypted by
six different logic encryption algorithms. The
simulation results show that the enhanced method can
successfully unlock 10 benchmark circuits which
originally could not be cracked within 1 hour. For all
the benchmark circuits, the average speedup is
approximately 2.2x in terms of simulation time.
Furthermore, a recent logic encryption method locks a
design by creating cyclic paths, which can invalidate
the SAT-based attack method. We analyze the impact of
cyclic paths and propose an enhancement to break the
cyclic logic encryption method.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2018:PIP,
author = "Irith Pomeranz",
title = "Partially Invariant Patterns for {LFSR}-Based
Generation of Close-to-Functional Broadside Tests",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "53:1--53:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3201405",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Close-to-functional scan-based tests are expected to
create close-to-functional operation conditions in
order to avoid overtesting of delay faults. Existing
metrics for the proximity to functional operation
conditions are based on the scan-in state. For example,
they consider the distance between the scan-in state
and a reachable state (a state that the circuit can
visit during functional operation). However, the
deviation from functional operation conditions can
increase during a test beyond the deviation that is
measured by the scan-in state. To ensure that the
deviation does not increase, this article introduces
the concept of a partially invariant pattern. The
article describes a procedure for extracting partially
invariant patterns from functional broadside tests
whose scan-in states are reachable states. Being
partially specified, partially invariant patterns are
suitable for test data compression. The article studies
the use of partially invariant patterns for
linear-feedback shift-register ( LFSR ) based test data
compression. Noting that a seed may not exist for a
given partially invariant pattern with a given LFSR,
the procedure described in this article uses an
iterative process that not only matches a seed to a
partially invariant pattern, but also adjusts the
partially invariant pattern based on the test that the
seed produces. The article also addresses the selection
of LFSR's for the generation of close-to-functional
broadside tests based on partially invariant patterns.
Experimental results are presented to demonstrate the
feasibility of the procedure.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zhao:2018:TSB,
author = "Hengyang Zhao and Qi Hua and Hai-Bao Chen and Yaoyao
Ye and Hai Wang and Sheldon X.-D. Tan and Esteban
Tlelo-Cuautle",
title = "Thermal-Sensor-Based Occupancy Detection for Smart
Buildings Using Machine-Learning Methods",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "54:1--54:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3200904",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we propose a novel approach to detect
the occupancy behavior of a building through the
temperature and/or possible heat source information.
The new method can be used for energy reduction and
security monitoring for emerging smart buildings. Our
work is based on a building simulation program,
EnergyPlus, from the Department of Energy. EnergyPlus
can model various time-series inputs to a building such
as ambient temperature; heating, ventilation, and
air-conditioning (HVAC) inputs; power consumption of
electronic equipment; lighting; and number of occupants
in a room, sampled each hour, and produce resulting
temperature traces of zones (rooms). Two
machine-learning-based approaches for detecting human
occupancy of a smart building are applied herein,
namely support vector regression (SVR) and recurrent
neural network (RNN). Experimental results with SVR
show that the four-feature model provides accurate
detection rates, giving a 0.638 average error and
5.32\% error rate, and the five-feature model delivers
a 0.317 average error and 2.64\% error rate. This
indicates that SVR is a viable option for occupancy
detection. In the RNN method, Elman's RNN can estimate
occupancy information of each room of a building with
high accuracy. It has local feedback in each layer and,
for a five-zone building, it is very accurate for
occupancy behavior estimation. The error level, in
terms of number of people, can be as low as 0.0056 on
average and 0.288 at maximum, considering ambient, room
temperatures, and HVAC powers as detectable
information. Without knowing HVAC powers, the
estimation error can still be 0.044 on average, and
only 0.71\% estimated points have errors greater than
0.5. Our article further shows that both methods
deliver similar accuracy in the occupancy detection.
But the SVR model is more stable for adding or removing
features of the system, while the RNN method can
deliver more accuracy when the features used in the
model do not change a lot.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Shalu:2018:DDS,
author = "Shalu and Srijan Kumar and Ananya Singla and Sudip Roy
and Krishnendu Chakrabarty and Partha P. Chakrabarti
and Bhargab B. Bhattacharya",
title = "Demand-Driven Single- and Multitarget Mixture
Preparation Using Digital Microfluidic Biochips",
journal = j-TODAES,
volume = "23",
number = "4",
pages = "55:1--55:??",
month = jul,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3200903",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:39 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Recent studies in algorithmic microfluidics have led
to the development of several techniques for automated
solution preparation using droplet-based digital
microfluidic (DMF) biochips. A major challenge in this
direction is to produce a mixture of several reactants
with a desired ratio while optimizing reactant cost and
preparation time. The sequence of mix-split operations
that are to be performed on the droplets is usually
represented as a mixing tree (or graph). In this
article, we present an efficient mixing algorithm,
namely, Mixing Tree with Common Subtrees ( MTCS ), for
preparing single-target mixtures. MTCS attempts to best
utilize intermediate droplets, which were otherwise
wasted, and uses morphing based on permutation of leaf
nodes to further reduce the graph size. The technique
can be generalized to produce multitarget ratios, and
we present another algorithm, namely, Multiple Target
Ratios ( MTR ). Additionally, in order to enhance the
output load, we also propose an algorithm for droplet
streaming called Multitarget Multidemand ( MTMD ).
Simulation results on a large set of target ratios show
that MTCS can reduce the mean values of the total
number of mix-split steps ($ T_{ms}$) and waste
droplets ($W$) by 16\% and 29\% over Min-Mix (Thies et
al. 2008) and by 22\% and 34\% over RMA (Roy et al.
2015), respectively. Experimental results also suggest
that MTR can reduce the average values of T$_{ms}$ and
W by 23\% and 44\% over the repeated version of
Min-Mix, by 30\% and 49\% over the repeated version of
RMA, and by 9\% and 22\% over the repeated-version of
MTCS, respectively. It is observed that MTMD can reduce
the mean values of T$_{ms}$ and W by 64\% and 85\%,
respectively, over MTR. Thus, the proposed multitarget
techniques MTR and MTMD provide efficient solutions to
multidemand, multitarget mixture preparationon a DMF
platform.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2018:DML,
author = "Hantao Huang and Hang Xu and Yuehua Cai and Rai
Suleman Khalid and Hao Yu",
title = "Distributed Machine Learning on Smart-Gateway Network
toward Real-Time Smart-Grid Energy Management with
Behavior Cognition",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "56:1--56:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3209888",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Real-time data analytics for smart-grid energy
management is challenging with consideration of both
occupant behavior profiles and energy profiles. This
article proposes a distributed and networked
machine-learning platform on smart-gateway-based
smart-grid in residential buildings. It can analyze
occupant behaviors, provide short-term load
forecasting, and allocate renewable energy resources.
First, occupant behavior profile is captured by
real-time indoor positioning system with WiFi data
analytics; and the energy profile is extracted by
real-time meter system with electricity load data
analytics. Then, the 24-hour occupant behavior profile
and energy profile are fused with prediction using an
online distributed machine-learning algorithm with
real-time data update. Based on the forecasted occupant
behavior profile and energy profile, solar energy
source is allocated to reduce peak demand on the main
electricity power-grid. The whole management flow can
be operated on the distributed smart-gateway network
with limited computational resources but with a
supported general machine-learning engine. Experimental
results on occupant behavior extraction show that the
proposed algorithm can achieve 91.2\% positioning
accuracy within 3.64m. Moreover, $ 50 \times $ and $ 38
\times $ speed-up is obtained during data testing and
training, respectively, when compared to traditional
support vector machine (SVM) method. For short-term
load forecasting, it is 14.83\% more accurate when
compared to SVM-based data analytics. Based on the
predicted occupant behavior profile and energy profile,
our proposed energy management system can achieve
19.66\% more peak load reduction and 26.41\% more cost
saving as compared to the SVM-based method.",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zoni:2018:CSC,
author = "Davide Zoni and Alessandro Barenghi and Gerardo Pelosi
and William Fornaciari",
title = "A Comprehensive Side-Channel Information Leakage
Analysis of an In-Order {RISC CPU} Microarchitecture",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "57:1--57:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3212719",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Side-channel attacks are a prominent threat to the
security of embedded systems. To perform them, an
adversary evaluates the goodness of fit of a set of
key-dependent power consumption models to a collection
of side-channel measurements taken from an actual
device, identifying the secret key value as the one
yielding the best-fitting model. In this work, we
analyze for the first time the microarchitectural
components of a 32-bit in-order RISC CPU, showing which
one of them is accountable for unexpected side-channel
information leakage. We classify the leakage sources,
identifying the data serialization points in the
microarchitecture and providing a set of hints that can
be fruitfully exploited to generate implementations
resistant against side-channel attacks, either writing
or generating proper assembly code.",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Seo:2018:NIS,
author = "Minjun Seo and Roman Lysecky",
title = "Non-Intrusive In-Situ Requirements Monitoring of
Embedded System",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "58:1--58:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3206213",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Accounting for all operating conditions of a system at
the design stage is typically infeasible for complex
systems. Monitoring and verifying system requirements
at runtime enable a system to continuously and
introspectively ensure the system is operating
correctly in the presence of dynamic execution
scenarios. In this article, we present a
requirements-driven methodology enabling efficient
runtime monitoring of embedded systems. The proposed
approach extracts a runtime monitoring graph from
system requirements specified using UML sequence
diagrams. Non-intrusive, on-chip hardware dynamically
monitors the system execution, verifies the execution
adheres to the requirements model, and in the event of
a failure provides detailed information that can be
analyzed to determine the root cause. Using case
studies of an autonomous vehicle and pacemaker
prototypes, we analyze the relationship between event
coverage, detection rate, and hardware requirements",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2018:DDP,
author = "Irith Pomeranz",
title = "Dynamically Determined Preferred Values and a
Design-for-Testability Approach for Multiplexer Select
Inputs under Functional Test Sequences",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "59:1--59:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3219778",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Earlier works observed that certain primary inputs
have preferred values, which help increase the
gate-level fault coverage when they appear in a
functional test sequence. This article observes that
multiplexers present additional opportunities for
increasing the fault coverage of a functional test
sequence, which are not captured by preferred primary
input values. Because multiplexers are prevalent, their
effect on the fault coverage can be significant. A
static analysis that is independent of any functional
test sequence is performed in this article to identify
preferred values for the outputs of multiplexers. This
is followed by a dynamic analysis that adjusts the
select inputs of the multiplexers for a given
functional test sequence to ensure that the preferred
values appear on the outputs of the multiplexers more
often. The analysis yields design-for-testability logic
for the select inputs of the multiplexers that have
preferred values. The logic is independent of the
functional test sequence, and it allows the fault
coverage to be increased when the select inputs are not
primary inputs, or when the same select inputs are used
for different multiplexers. Experimental results are
presented to demonstrate that this approach has a
significant effect on the fault coverage of functional
test sequences.",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lee:2018:PTT,
author = "Dongjin Lee and Sourav Das and Janardhan Rao Doppa and
Partha Pratim Pande and Krishnendu Chakrabarty",
title = "Performance and Thermal Tradeoffs for Energy-Efficient
Monolithic {$3$D} Network-on-Chip",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "60:1--60:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3223046",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Three-dimensional (3D) integration enables the design
of high-performance and energy-efficient network on
chip (NoC) architectures as communication backbones for
manycore chips. To exploit the benefits of the vertical
dimension of 3D integration, through-silicon-via (TSV)
has been predominantly used in state-of-the-art
manycore chip design. However, for TSV-based systems,
high power density and the resultant thermal hotspot
remain major concerns from the perspectives of chip
functionality and overall reliability. The power
consumption and thermal profiles of 3D NoCs can be
improved by incorporating a Voltage-Frequency-Island
(VFI)-based power management strategy. However, due to
inherent thermal constraints of a TSV-based 3D system,
we are unable to fully exploit the benefits offered by
the power management methodology. In this context,
emergence of monolithic 3D (M3D) integration has opened
up new possibility of designing ultra-low-power and
high-performance circuits and systems. The smaller
dimensions of the inter-layer dielectric (ILD) and
monolithic inter-tier vias (MIVs) offer high-density
integration, flexibility of partitioning logic blocks
across multiple tiers, and significant reduction of
total wire-length. In this work, we present the
first-ever study of the performance-thermal tradeoffs
for energy efficient monolithic 3D manycore chips. In
particular, we present a comparative performance
evaluation of M3D NoCs with respect to their
conventional TSV-based counterparts. We demonstrate
that the proposed M3D-based NoC architecture
incorporating VFI-based power management achieves a
maximum of 29.4\% lower energy-delay-product (EDP)
compared to the TSV-based designs for a large set of
benchmarks. We also demonstrate that the M3D-based NoC
shows up to 29.1\% lower maximum temperature than the
TSV-based counterpart for these benchmarks.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Han:2018:FCS,
author = "Inhak Han and Youngsoo Shin",
title = "Folded Circuit Synthesis: Min-Area Logic Synthesis
Using Dual-Edge-Triggered Flip-Flops",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "61:1--61:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3229082",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The area required by combinational logic of a
sequential circuit based on standard flip-flops can be
reduced by identifying subcircuits that are identical.
Pairs of matching subcircuits can then be replaced by
circuits in which dual-edge-triggered flip-flops
operate on multiplexed data at the rising and falling
edges of the clock signal. We show how to modify the
Boolean network describing a combinational logic to
increase the opportunities for folding, without
affecting its function. Experiments with benchmark
circuits achieved an average reduction in circuit area
of 18\%.",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Elmandouh:2018:GFV,
author = "Eman M. Elmandouh and Amr G. Wassal",
title = "Guiding Formal Verification Orchestration Using
Machine Learning Methods",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "62:1--62:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3224206",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Typical modern HW designs include many blocks
associated with thousands of design properties. Having
today's commercial formal verifiers utilize a
complementary set of state-of-art formal algorithms is
a key in enabling the formal verification tools to
successfully cope with verification problems of
different sizes, types, and complexities. Formal
engines orchestration is the methodology used to pick
the most appropriate formal engine for a specific
verification problem. It assures proper scheduling of
the formal engines to minimize the time consumed to
solve individual design verification problems, hence
highly impacts the time required to verify the overall
design properties. This work proposes the utilization
of supervised machine learning classification
techniques to guide the orchestration step by
predicting the formal engines that should be assigned
to a design property. Up to 16,500 formal verification
runs on RTL designs and their properties are used to
train the classifier to create a prediction model. The
classifier assigns any new verification problem to an
appropriate list of formal engines associated with a
probability distribution over the set of engines
classes. Our results indicate how the proposed model is
able to improve the formal suite total run-time by up
to 59\% of its maximum allowable time improvement using
multi-classification-based orchestration and to
nominate with 88\% accuracy the appropriate formal
engines for new-to-verify HW designs.",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{K:2018:AAF,
author = "Keerthi K and Chester Rebeiro and Aritra Hazra",
title = "An Algorithmic Approach to Formally Verify an {ECC}
Library",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "63:1--63:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3224205",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The weakest link in cryptosystems is quite often due
to the implementation rather than the mathematical
underpinnings. A vast majority of attacks in the recent
past have targeted programming flaws and bugs to break
security systems. Due to the complexity, empirically
verifying such systems is practically impossible, while
manual verification as well as testing do not provide
adequate guarantees. In this article, we leverage model
checking techniques to prove the functional correctness
of an elliptic curve cryptography (ECC) library with
respect to its formal specification. We demonstrate how
the huge state space of the C library can be aptly
verified using a hierarchical assume-guarantee
verification strategy. To test the scalability of this
approach, we verify the correctness of five
NIST-specified elliptic curve implementations. We also
verify the newer curve25519 elliptic curve, which is
finding multiple applications, due to its higher
security and simpler implementation. The 192-bit NIST
elliptic curve took 1 day to verify. This was the
smallest curve we verified. The largest curve with a
521-bit prime field took 26 days to verify. Curve25519
took 1.5 days to verify.",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2018:EFM,
author = "Tseng-Yi Chen and Yuan-Hao Chang and Yuan-Hung Kuan
and Ming-Chang Yang and Yu-Ming Chang and Pi-Cheng
Hsiu",
title = "Enhancing Flash Memory Reliability by Jointly
Considering Write-back Pattern and Block Endurance",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "64:1--64:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3229192",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Owing to high cell density caused by the advanced
manufacturing process, the reliability of flash drives
turns out to be rather challenging in flash system
designs. To enhance the reliability of flash drives,
error-correcting code (ECC) has been widely utilized in
flash drives to correct error bits during
programming/reading data to/from flash drives. Although
ECC can effectively enhance the reliability of flash
drives by correcting error bits, the capability of ECC
would degrade while the program/erase (P/E) cycles of
flash blocks is increased. Finally, ECC could not
correct a flash page, because a flash page contains too
many error bits. As a result, reducing error bits is an
effective solution to further improve the reliability
of flash drives when a specific ECC is adopted in the
flash drive. This work focuses on how to reduce the
probability of producing error bits in a flash page.
Thus, we propose a pattern-aware write strategy for
flash reliability enhancement. The proposed write
strategy considers both the P/E cycle of blocks and the
pattern of written data while a flash block is
allocated to store the written data. Since the proposed
write strategy allocates young blocks (respectively,
old blocks) for hot data (respectively, cold data) and
flips the bit pattern of the written data to the
appropriate bit pattern, the proposed strategy can
effectively improve the reliability of flash drives.
The experimental results show that the proposed
strategy can reduce the number of error pages by up to
50\%, compared with the well-known DFTL solution.
Moreover, the proposed strategy is orthogonal with all
ECC mechanisms so that the reliability of the flash
drives with ECC mechanisms can be further improved by
the proposed strategy.",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xie:2018:TER,
author = "Guoqi Xie and Zhetao Li and Na Yuan and Renfa Li and
Keqin Li",
title = "Toward Effective Reliability Requirement Assurance for
Automotive Functional Safety",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "65:1--65:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3230620",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Automotive functional safety requirement includes
response time and reliability requirements learning
from the functional safety standard ISO 26262. These
two requirements must be simultaneously satisfied to
assure automotive functional safety requirement.
However, increasing reliability increases the response
time intuitively. This study proposes a method to find
the solution with the minimum response time while
assuring reliability requirement. Pre-assigning
reliability values to unassigned tasks by transferring
the reliability requirement of the function to each
task is a useful reliability requirement assurance
approach proposed in recent years. However, the
pre-assigned reliability values in state-of-the-art
studies have unbalanced distribution of the reliability
of all tasks, thereby resulting in a limited reduction
in response time. This study presents the geometric
mean-based non-fault-tolerant reliability
pre-assignment (GMNRP) and geometric mean-based
fault-tolerant reliability pre-assignment (GMFRP)
approaches, in which geometric mean-based reliability
values are pre-assigned to unassigned tasks. Geometric
mean can make the pre-assigned reliability values of
unassigned tasks to the central tendency, such that it
can distribute the reliability requirements in a more
balanced way. Experimental results show that GMNRP and
GMFRP can effectively reduce the response time compared
with their individual state-of-the-art counterparts.",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Abuowaimer:2018:GRD,
author = "Ziad Abuowaimer and Dani Maarouf and Timothy Martin
and Jeremy Foxcroft and Gary Gr{\'e}wal and Shawki
Areibi and Anthony Vannelli",
title = "{GPlace3.0}: Routability-Driven Analytic Placer for
{UltraScale FPGA} Architectures",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "66:1--66:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3233244",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Optimizing for routability during FPGA placement is
becoming increasingly important, as failure to spread
and resolve congestion hotspots throughout the chip,
especially in the case of large designs, may result in
placements that either cannot be routed or that require
the router to work excessively hard to obtain success.
In this article, we introduce a new, analytic
routability-aware placement algorithm for Xilinx
UltraScale FPGA architectures. The proposed algorithm,
called GPlace3.0, seeks to optimize both wirelength and
routability. Our work contains several unique features
including a novel window-based procedure for satisfying
legality constraints in lieu of packing, an accurate
congestion estimation method based on modifications to
the pathfinder global router, and a novel detailed
placement algorithm that optimizes both wirelength and
external pin count. Experimental results show that
compared to the top three winners at the recent ISPD'16
FPGA placement contest, GPlace3.0 is able to achieve
(on average) a 7.53\%, 15.15\%, and 33.50\% reduction
in routed wirelength, respectively, while requiring
less overall runtime. As well, an additional 360
benchmarks were provided directly from Xilinx Inc.
These benchmarks were used to compare GPlace3.0 to the
most recently improved versions of the first- and
second-place contest winners. Subsequent experimental
results show that GPlace3.0 is able to outperform the
improved placers in a variety of areas including number
of best solutions found, fewest number of benchmarks
that cannot be routed, runtime required to perform
placement, and runtime required to perform routing.",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Fallahzadeh:2018:TPC,
author = "Ramin Fallahzadeh and Hassan Ghasemzadeh",
title = "Trading Off Power Consumption and Prediction
Performance in Wearable Motion Sensors: an Optimal and
Real-Time Approach",
journal = j-TODAES,
volume = "23",
number = "5",
pages = "67:1--67:??",
month = oct,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3198457",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Power consumption is identified as one of the main
complications in designing practical wearable systems,
mainly due to their stringent resource limitations.
When designing wearable technologies, several
system-level design choices, which directly contribute
to the energy consumption of these systems, must be
considered. In this article, we propose a
computationally lightweight system optimization
framework that trades off power consumption and
performance in connected wearable motion sensors. While
existing approaches exclusively focus on one or a few
hand-picked design variables, our framework
holistically finds the optimal power-performance
solution with respect to the specified application
need. Our design tackles a multi-variant non-convex
optimization problem that is theoretically hard to
solve. To decrease the complexity, we propose a
smoothing function that reduces this optimization to a
convex problem. The reduced optimization is then solved
in linear time using a devised derivative-free
optimization approach, namely cyclic coordinate search.
We evaluate our framework against several holistic
optimization baselines using a real-world wearable
activity recognition dataset. We minimize the energy
consumption for various activity-recognition
performance thresholds ranging from 40\% to 80\% and
demonstrate up to 64\% energy savings.",
acknowledgement = ack-nhfb,
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Daboul:2018:AAT,
author = "Siad Daboul and Stephan Held and Jens Vygen and Sonja
Wittke",
title = "An Approximation Algorithm for Threshold Voltage
Optimization",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "68:1--68:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3232538",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a primal-dual approximation algorithm for
minimizing the leakage power of an integrated circuit
by assigning gate threshold voltages. While most
existing techniques do not provide a performance
guarantee, we prove an upper bound on the power
consumption. The algorithm is practical and works with
an industrial sign-off timer. It can be used for
post-routing power reduction or for optimizing leakage
power throughout the design flow. We demonstrate the
practical performance on recent microprocessor units.
Our implementation obtains significant leakage power
reductions of up to 8\% on top of one of the most
successful algorithms for gate sizing and threshold
voltage optimization. After timing-aware global
routing, we achieve leakage power reductions of up to
34\%.",
acknowledgement = ack-nhfb,
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Delledonne:2018:CDA,
author = "Lorenzo Delledonne and Vittorio Zaccaria and Ruggero
Susella and Guido Bertoni and Filippo Melzani",
title = "{CASCA}: a Design Automation Approach for Designing
Hardware Countermeasures Against Side-Channel Attacks",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "69:1--69:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3241047",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Implementing a cryptographic circuit poses challenges
not always acknowledged in the backing mathematical
theory. One of them is the vulnerability against
side-channel attacks. A side-channel attack is a
procedure that uses information leaked by the circuit
through, for example, its own power consumption or
electromagnetic emissions, to derive sensitive data
(e.g, the secret key used for encryption). Nowadays, we
design circuitry to keep this sensitive information
from leaking (i.e., a countermeasure ), but the path
from specification down to implementation is far from
being fully automatic. As we know, manual refinement
steps can be error prone and the sheer potential of
these errors can be devastating in a scenario such as
the one we are dealing with. In this article, we
investigate whether a single embedded domain specific
language (EDSL) can, at the same time, help us in
specifying and enforcing the functionality of the
circuit as well as its protection against side-channel
attacks. The EDSL is a fundamental block of an original
design flow (named Countermeasure Against Side-Channel
Attacks, i.e., CASCA) whose aim is to complement an
existing industrial scenario and to provide the
necessary guarantee that a secure primitive is not
vulnerable up to a first-order attack. As a practical
case study, we will show how we applied the proposed
tools to ensure both functional and extra-functional
correctness of a composite-field Advanced Encryption
Standard (AES) S-Box. To ensure the reproducibility of
this research, this article is accompanied by an open
source release of the EDSL$^1$ that contains the
presented S-Box implementation and an additional
3-Shares threshold implementation of the Keccak $ \chi
$ function [7].",
acknowledgement = ack-nhfb,
articleno = "69",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chang:2018:DMU,
author = "Doohwang Chang and Ganapati Bhat and Umit Ogras and
Bertan Bakkaloglu and Sule Ozev",
title = "Detection Mechanisms for Unauthorized Wireless
Transmissions",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "70:1--70:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3241046",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "With increasing diversity of supply chains from design
to delivery, there is an increasing risk that
unauthorized changes can be made within an IC. One of
the motivations for this type of change is to learn
important information (such as encryption keys,
spreading codes) from the hardware, and transmit this
information to a malicious party. To evade detection,
such unauthorized communication can be hidden within
legitimate bursts of transmit signal. In this article,
we present several signal processing techniques to
detect unauthorized transmissions which can be hidden
within the legitimate signal. We employ a scheme where
the legitimate transmission is configured to emit a
single sinusoidal waveform. We use time and spectral
domain analysis techniques to explore the transmit
spectrum. Since every transmission, no matter how low
the signal power is, must have a spectral signature, we
identify unauthorized transmission by eliminating the
desired signal from the spectrum after capture.
Experiment results show that when spread spectrum
techniques are used, the presence of an unauthorized
signal can be determined without the need for decoding
the malicious signal. The proposed detection techniques
need to be used as enhancements to the regular testing
and verification procedures if hardware security is a
concern.",
acknowledgement = ack-nhfb,
articleno = "70",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Dong:2018:PAA,
author = "Xuan Dong and Lihong Zhang",
title = "{PV}-Aware Analog Sizing for Robust Analog Layout
Retargeting with Optical Proximity Correction",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "71:1--71:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3236624",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "For analog integrated circuits (ICs) in nanometer
technology nodes, process variation (PV) induced by
lithography may not only cause serious wafer pattern
distortion, but also result in device mismatch, which
can readily ruin circuit performance. Although the
conventional optical proximity correction (OPC)
operations can effectively improve the wafer image
fidelity, an analog circuit without robust device sizes
is still highly vulnerable to such a mismatch effect.
In this article, a PV-aware sizing-inclusive analog
layout retargeting framework, which encloses an
efficient hybrid OPC scheme for yield enhancement, is
proposed. The device sizes are tuned during the layout
retargeting process by using a deterministic
circuit-sizing algorithm considering PV conditions. Our
hybrid OPC method combines global rule-based OPC with
local model-based OPC functions to boost the wafer
image quality improvement but without degrading the
computational efficiency. The experimental results show
that our proposed framework can achieve the best wafer
image quality and circuit performance preservation
compared to any other alternative approaches.",
acknowledgement = ack-nhfb,
articleno = "71",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Eslami:2018:RTC,
author = "Fatemeh Eslami and Steven J. E. Wilton",
title = "Rapid Triggering Capability Using an Adaptive Overlay
during {FPGA} Debug",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "72:1--72:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3241045",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Field Programmable Gate Array (FPGA) technology is
rapidly gaining traction in a wide range of
applications. Nonetheless, FPGAs still require long
design and debug cycles. To debug hardware circuits,
trace-based instrumentation is inserted into the design
that enables capturing data during the circuit
execution into on-chip memories for later offline
analysis. Since on-chip memories are limited, a trigger
circuitry is used to only record data related to
specific events during the execution. However, during
debugging, a circuit recompilation is required on
modifying these instruments. This can be very slow,
reducing debug productivity. In this article, we
propose a non-intrusive and rapid triggering solution
with a tailored overlay fabric and mapping algorithm
that seeks to enable fast debug iterations without
performing a recompilation. This overlay is specialized
for small combinational and sequential circuits with a
single output; such circuits are typical of common
trigger functions. We present an adaptive strategy to
construct the overlay fabric using spare FPGA resources
at compile time. At debug time, our proposed trigger
mapping algorithms adapt to this specialized overlay to
rapidly implement combinational and sequential trigger
circuits. Our results show that the overlay fabric can
be reconfigured to map different triggering scenarios
in less than 40s instead of recompiling the circuit
during debug iterations, increasing debug
productivity.",
acknowledgement = ack-nhfb,
articleno = "72",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Xiang:2018:FTU,
author = "Dong Xiang and Krishnendu Chakrabarty and Hideo
Fujiwara",
title = "Fault-Tolerant Unicast-Based Multicast for Reliable
Network-on-Chip Testing",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "73:1--73:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3243214",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We present a unified test technique that targets
faults in links, routers, and cores of a
network-on-chip design based on test sessions. We call
an entire procedure, that delivers test packets to the
subset of routers/cores, a test session. Test delivery
for router/core testing is formulated as two
fault-tolerant multicast algorithms. Test packet
delivery for routers is implemented as a fault-tolerant
unicast-based multicast scheme via the fault-free links
and routers that were identified in the previous test
sessions to avoid packet corruption. A new
fault-tolerant routing algorithm is also proposed for
the unicast-based multicast core test delivery in the
whole network. Identical cores share the same test set,
and they are tested within the same test session.
Simulation results highlight the effectiveness of the
proposed method in reducing test time.",
acknowledgement = ack-nhfb,
articleno = "73",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Topaloglu:2018:ETS,
author = "Rasit O. Topaloglu and Farinaz Koushanfar",
title = "Editorial for {TODAES} Special Issue on {Internet of
Things} System Performance, Reliability, and Security",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "74:1--74:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3276908",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
acknowledgement = ack-nhfb,
articleno = "74e",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Yang:2018:UUE,
author = "Kun Yang and Ulbert Botero and Haoting Shen and Damon
L. Woodard and Domenic Forte and Mark M. Tehranipoor",
title = "{UCR}: an Unclonable Environmentally Sensitive
Chipless {RFID} Tag For Protecting Supply Chain",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "74:1--74:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3264658",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Chipless Radio Frequency Identification (RFID) tags
that do not include an integrated circuit (IC) in the
transponder are more appropriate for supply-chain
management of low-cost commodities and have been
gaining extensive attention due to their relatively
lower price. However, existing chipless RFID tags
consume considerable tag area and manufacturing
time/cost because of complex fabrication process (e.g.,
requiring removing or shorting some resonators on the
tag substrate to encode data). Worse still, their
identifiers (IDs) are deterministic, clonable, and
small in terms of bitwidth. To address these
shortcomings and help preserve the cold chain for
commodities (e.g., vaccines, pharmaceuticals, etc.)
sensitive to temperature, we develop a novel unclonable
environmentally sensitive chipless RFID (UCR) tag that
intrinsically generates a unique ID from both
manufacturing variations and ambient temperature
variation. A UCR tag consists of two parts: (i) a
certain number of concentric ring slot resonators
integrated on a certain laminate (e.g., TACONIC TLX-0),
whose resonance frequencies rely on geometric
parameters of slot resonators and dielectric constant
of substrate material that are sensitive to
manufacturing variations, and (ii) a stand-alone
circular ring slot resonator integrated on a particular
substrate (e.g., grease) that will be melted at a high
temperature, whose resonance frequency relies on
geometric parameters of slot resonator, dielectric
constant of substrate material, and ambient
temperature. UCR tags have the capability to track
commodities and their temperatures in the supply chain.
The area of UCR tag is comparable to regular quick
response (QR) code. Experimental results based on UCR
tag prototypes have verified their uniqueness and
reliability.",
acknowledgement = ack-nhfb,
articleno = "74",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hussain:2018:SSH,
author = "Siam Umar Hussain and M. Sadegh Riazi and Farinaz
Koushanfar",
title = "{SHAIP}: {Secure Hamming Distance for Authentication
of Intrinsic PUFs}",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "75:1--75:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3274669",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In this article, we present SHAIP, a secure Hamming
distance-based mutual authentication protocol. It
allows an unlimited number of authentications by
employing an intrinsic Physical Unclonable Function
(PUF). PUFs are being increasingly employed for remote
authentication of devices. Most of these devices have
limited resources. Therefore, the intrinsic PUFs are
most suitable for this task as they can be built with
little or no modification to the underlying hardware
platform. One major drawback of the current
authentication schemes is that they expose the PUF
response. This makes the intrinsic PUFs, which have a
limited number of challenge-response pairs, unusable
after a certain number of authentication sessions.
Moreover, these schemes are one way in the sense that
they only allow one party, the prover, to authenticate
herself to the verifier. We propose a symmetric mutual
authentication scheme based on secure
(privacy-preserving) computation of the Hamming
distance between the PUF response from the remote
device and reference response stored at the verifier
end. This allows both parties to authenticate each
other without revealing their respective sets of
inputs. We show that our scheme is effective with all
state-of-the-art intrinsic PUFs. The proposed scheme is
lightweight and does not require any modification to
the underlying hardware.",
acknowledgement = ack-nhfb,
articleno = "75",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Winograd:2018:PGU,
author = "Ted Winograd and Gaurav Shenoy and Hassan Salmani and
Hamid Mahmoodi and Setareh Rafatirad and Houman
Homayoun",
title = "Programmable Gates Using Hybrid {CMOS--STT} Design to
Prevent {IC} Reverse Engineering",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "76:1--76:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3236622",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents a rigorous step towards
design-for-assurance by introducing a new class of
logically reconfigurable design resilient to design
reverse engineering. Based on the non-volatile spin
transfer torque (STT) magnetic technology, we introduce
a basic set of non-volatile reconfigurable
Look-Up-Table (LUT) logic components (NV-STT-based
LUTs). An STT-based LUT with a significantly different
set of characteristics compared to CMOS provides new
opportunities to enhance design security yet makes it
challenging to remain highly competitive with custom
CMOS or even SRAM-based LUT in terms of power,
performance, and area. To address these challenges, we
propose several algorithms to select and replace custom
CMOS gates with reconfigurable STT-based LUTs during
design implementation such that the functionality of
STT-based components and therefore the entire design
cannot be determined in any manageable time, rendering
any design reverse engineering attack ineffective. Our
study, conducted on a large number of standard circuit
benchmarks, concludes significant resiliency of hybrid
STT-CMOS circuits against various types of attacks.
Furthermore, the selection algorithms on average have a
small impact on the performance of the circuit. We also
tested these techniques against satisfiability attacks
developed recently and show that these techniques also
render more advanced reverse-engineering techniques
computationally infeasible.",
acknowledgement = ack-nhfb,
articleno = "76",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Truong:2018:LSE,
author = "Anh Truong and S. Rasoul Etesami and Negar Kiyavash",
title = "Learning From Sleeping Experts: Rewarding Informative,
Available, and Accurate Experts",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "77:1--77:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3236617",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We consider a generalized model of learning from
expert advice in which experts could abstain from
participating at some rounds. Our proposed online
algorithm falls into the class of weighted average
predictors and uses a time-varying multiplicative
weight update rule. This update rule changes the weight
of an expert based on his or her relative performance
compared to the average performance of available
experts at the current round. This makes the algorithm
suitable for recommendation systems in the presence of
an adversary with many potential applications in the
new emerging area of the Internet of Things. We prove
the convergence of our algorithm to the best expert,
defined in terms of both availability and accuracy, in
the stochastic setting. In particular, we show the
applicability of our definition of best expert through
convergence analysis of another well-known algorithm in
this setting. Finally, through simulation results on
synthetic and real datasets, we justify the
out-performance of our proposed algorithms compared to
the existing ones in the literature.",
acknowledgement = ack-nhfb,
articleno = "77",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chopra:2018:OAC,
author = "Abhimanyu Chopra and Hakan Aydin and Setareh Rafatirad
and Houman Homayoun",
title = "Optimal Allocation of Computation and Communication in
an {IoT} Network",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "78:1--78:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3236623",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Internet of things (IoT) is being developed for a wide
range of applications from home automation and personal
fitness to smart cities. With the extensive growth in
adaptation of IoT devices comes the uncoordinated and
substandard designs aimed at promptly making products
available to the end consumer. This substandard
approach restricts the growth of IoT in the near future
and necessitates that studies understand requirements
for an efficient design. A particular area where IoT
applications have grown significantly is surveillance
and monitoring. Applications of IoT in this domain are
relying on distributed sensors, each equipped with a
battery, capable of collecting images, processing
images, and communicating the raw or processed data to
the nearest node until it reaches the base station for
decision making. In such an IoT network where
processing can be distributed over the network, the
important research question is how much of data each
node should process and how much it should communicate
for a given objective. This work answers this question
and provides a deeper understanding of energy and delay
tradeoffs in an IoT network with three different target
metrics.",
acknowledgement = ack-nhfb,
articleno = "78",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hussain:2018:PPP,
author = "Siam Umar Hussain and Farinaz Koushanfar",
title = "{P3}: Privacy Preserving Positioning for Smart
Automotive Systems",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "79:1--79:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3236625",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article presents the first privacy-preserving
localization method based on provably secure primitives
for smart automotive systems. Using this method, a car
that is lost due to unavailability of GPS can compute
its location with assistance from three nearby cars,
while the locations of all the participating cars
including the lost car remain private. Technological
enhancement of modern vehicles, especially in
navigation and communication, necessitates parallel
enhancement in security and privacy. Previous
approaches to maintaining user location privacy
suffered from one or more of the following drawbacks:
trade-off between accuracy and privacy, one-sided
privacy, and the need of a trusted third party that
presents a single point to attack. The localization
method presented here is one of the very first
location-based services that eliminates all these
drawbacks. Two protocols for computing the location is
presented here based on two Secure Function Evaluation
(SFE) techniques that allow multiple parties to jointly
evaluate a function on inputs that are encrypted to
maintain privacy. The first one is based on the
two-party protocol named Yao's Garbled Circuit (GC).
The second one is based on the Beaver-Micali-Rogaway
(BMR) protocol that allows inputs from more than two
parties. The two secure localization protocols exhibit
trade-offs between performance and resilience against
collusion. Along with devising the protocols, we design
and optimize netlists for the functions required for
location computation by leveraging conventional logic
synthesis tools with custom libraries optimized for
SFE. Proof-of-concept implementation of the protocol
shows that the complete operation can be performed
within only 355ms. The fast computing time enables
localization of even moving cars.",
acknowledgement = ack-nhfb,
articleno = "79",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Muztoba:2018:IAI,
author = "Md Muztoba and Rohit Voleti and Fatih Karabacak and
Jaehyun Park and Umit Y. Ogras",
title = "Instinctive Assistive Indoor Navigation using
Distributed Intelligence",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "80:1--80:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3212720",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Cyber-physical systems (CPS) and the Internet of
Things (IoT) offer a significant potential to improve
the effectiveness of assistive technologies for those
with physical disabilities. Practical assistive
technologies should minimize the number of inputs from
users to reduce their cognitive and physical effort.
This article presents an energy-efficient framework and
algorithm for assistive indoor navigation with
multi-modal user input. The goal of the proposed
framework is to simplify the navigation tasks and make
them more instinctive for the user. Our framework
automates indoor navigation using only a few user
commands captured through a wearable device. The
proposed methodology is evaluated using both a virtual
smart building and a prototype. The evaluations for
three different floorplans show one order of magnitude
reduction in user effort and communication energy
required for navigation, when compared to conventional
navigation methodologies that require continuous user
inputs.",
acknowledgement = ack-nhfb,
articleno = "80",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Karabacak:2018:RDU,
author = "Fatih Karabacak and Umit Ogras and Sule Ozev",
title = "Remote Detection of Unauthorized Activity via Spectral
Analysis",
journal = j-TODAES,
volume = "23",
number = "6",
pages = "81:1--81:??",
month = dec,
year = "2018",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3276770",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Unauthorized hardware or firmware modifications, known
as trojans, can steal information, drain the battery,
or damage IoT devices. Since trojans may be triggered
in the field at an unknown instance, it is important to
detect their presence at runtime. However, it is
difficult to run sophisticated detection algorithms on
these devices due to limited computational power and
energy and, in some cases, lack of accessibility. This
article presents a stand-off self-referencing technique
for detecting unauthorized activity. The proposed
technique processes involuntary electromagnetic
emissions on a separate hardware, which is physically
decoupled from the device under test. When the device
enters the test mode, a predefined test application is
run on the device repetitively for a known period. The
periodicity ensures that the spectral electromagnetic
power of the test application concentrates at known
frequencies, leaving the remaining frequencies within
the operating bandwidth at the noise level. Any
deviations from the noise level for these unoccupied
frequency locations indicate the presence of unknown
(unauthorized) activity. Hence, we are able to
differentiate trojan activity without using a golden
reference, or any knowledge of the attributes of the
trojan activity. Experiments based on hardware
measurements show that the proposed technique achieves
close to 100\% detection accuracy at up to 120cm
distance.",
acknowledgement = ack-nhfb,
articleno = "81",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Lin:2019:QEO,
author = "Chun-Han Lin and Chih-Kai Kang and Pi-Cheng Hsiu",
title = "Quality-Enhanced {OLED} Power Savings on Mobile
Devices",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3243215",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In the future, mobile systems will increasingly
feature more advanced organic light-emitting diode
(OLED) displays. The power consumption of these
displays is highly dependent on the image content.
However, existing OLED power-saving techniques either
change the visual experience of users or degrade the
visual quality of images in exchange for a reduction in
the power consumption. Some techniques attempt to
enhance the image quality by employing a compound
objective function. In this article, we present a
win-win scheme that always enhances the image quality
while simultaneously reducing the power consumption. We
define metrics to assess the benefits and cost for
potential image enhancement and power reduction. We
then introduce algorithms that ensure the
transformation of images into their quality-enhanced
power-saving versions. Next, the win-win scheme is
extended to process videos at a justifiable
computational cost. All the proposed algorithms are
shown to possess the win-win property without assuming
accurate OLED power models. Finally, the proposed
scheme is realized through a practical camera
application and a video camcorder on mobile devices.
The results of experiments conducted on a commercial
tablet with a popular image database and on a
smartphone with real-world videos are very encouraging
and provide valuable insights for future research and
practices.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Amir:2019:SPC,
author = "Maral Amir and Frank Vahid and Tony Givargis",
title = "Switching Predictive Control Using Reconfigurable
State-Based Model",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3267126",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Advanced control methodologies have helped the
development of modern vehicles that are capable of path
planning and path following. For instance, Model
Predictive Control (MPC) employs a predictive model to
predict the behavior of the physical system for a
specific time horizon in the future. An optimization
problem is solved to compute optimal control actions
while handling model uncertainties and nonlinearities.
However, these prediction routines are computationally
intensive and the computational overhead grows with the
complexity of the model. Switching MPC addresses this
issue by combining multiple predictive models, each
with a different precision granularity. In this
article, we proposed a novel switching predictive
control method based on a model reduction scheme to
achieve various model granularities for path following
in autonomous vehicles. A state-based model with
tunable parameters is proposed to operate as a
reconfigurable predictive model of the vehicle. A
runtime switching algorithm is presented that selects
the best model using machine learning. We employed a
metric that formulates the tradeoff between the error
and computational savings due to model reduction. Our
simulation results show that the use of the predictive
model in the switching scheme as opposed to single
granularity scheme, yields a 45\% decrease in execution
time in tradeoff for a small 12\% loss in accuracy in
prediction of future outputs and no loss of accuracy in
tracking the reference trajectory.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Erol:2019:KSB,
author = "Osman Emir Erol and Sule Ozev",
title = "Knowledge- and Simulation-Based Synthesis of
Area-Efficient Passive Loop Filter Incremental
{Zoom-ADC} for Built-In Self-Test Applications",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3266227",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "We propose a fully differential, synthesizable
zoom-ADC architecture with a passive loop filter for
low-frequency Built-In Self-Test (BIST) applications,
along with a synthesis tool that can target various
design specifications. We present the detailed ADC
architecture and a step-by-step process for designing
the zoom-ADC. The design flow does not rely on the
extensive knowledge of an experienced ADC designer. Two
ADCs have been synthesized with different performance
requirements in the 65nm CMOS process. The first ADC
achieves a 90.4dB Signal-to-Noise Ratio (SNR) in 512 $
\mu $ s measurement time and consumes 17 $ \mu $ W
power. The second design achieves a 78.2dB SNR in 31.25
$ \mu $ s measurement time and consumes 63 $ \mu $ W
power.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2019:SAT,
author = "Yukai Chen and Sara Vinco and Enrico Macii and Massimo
Poncino",
title = "{SystemC-AMS} Thermal Modeling for the Co-simulation
of Functional and Extra-Functional Properties",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3267125",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Temperature is a critical property of smart systems,
due to its impact on reliability and to its
inter-dependence with power consumption. Unfortunately,
the current design flows evaluate thermal evolution
ex-post on offline power traces. This does not allow to
consider temperature as a dimension in the design loop,
and it misses all the complex inter-dependencies with
design choices and power evolution. In this article, by
adopting the functional language SystemC-AMS (Analog
Mixed Signal), we propose a method to enable
thermal/power/functional co-simulation. The system
thermal model is built by using state-of-the-art
circuit equivalent models, by exploiting the support
for electrical linear networks intrinsic of
SystemC-AMS. The experimental results will show that
the choice of SystemC-AMS is a winning strategy for
building a simultaneous simulation of multiple
functional and extra-functional properties of a system.
The generated code exposes an accuracy comparable to
that of the reference thermal simulator HotSpot.
Additionally, the initial overhead due to the general
purpose nature of SystemC-AMS is compensated by the
surprisingly high performance of transient simulation,
with speedups as high as two orders of magnitude.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Song:2019:HRB,
author = "Yang Song and Olivier Alavoine and Bill Lin",
title = "Harvesting Row-Buffer Hits via Orchestrated Last-Level
Cache and {DRAM} Scheduling for Heterogeneous Multicore
Systems",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3269982",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In heterogeneous multicore systems, the memory
subsystem, including the last-level cache and DRAM, is
widely shared among the CPU, the GPU, and the real-time
cores. Due to their distinct memory traffic patterns,
heterogeneous cores result in more frequent cache
misses at the last-level cache. As cache misses travel
through the memory subsystem, two schedulers are
involved for the last-level cache and DRAM,
respectively. Prior studies treated the scheduling of
the last-level cache and DRAM as independent stages.
However, with no orchestration and limited visibility
of memory traffic, neither scheduling stage is able to
ensure optimal scheduling decisions for memory
efficiency. Unnecessary precharges and row activations
happen in DRAM when the memory scheduler is ignorant of
incoming cache misses, and DRAM row-buffer states are
invisible to the last-level cache. In this article, we
propose a unified memory controller for the the
last-level cache and DRAM with orchestrated schedulers.
The memory scheduler harvests row-buffer hit
opportunities in cache request buffers during spare
time without inducing significant implementation cost.
We further introduce a dynamic orchestrated scheduling
policy to improve memory efficiency while achieving
target CPU IPC. Extensive evaluations show that the
proposed controller improves the total memory bandwidth
of DRAM by 16.8\% on average and saves DRAM energy by
up to 29.7\% while achieving comparable CPU IPCs. With
the dynamic scheduling policy, the unified controller
achieves the same IPC as the conventional design and
increases DRAM bandwidth by 9.2\%. In addition, we
explore the potential of the proposed memory controller
to attain improvements on both memory bandwidth and CPU
IPC.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Choi:2019:OFT,
author = "Junchul Choi and Hoeseok Yang and Soonhoi Ha",
title = "Optimization of Fault-Tolerant Mixed-Criticality
Multi-Core Systems with Enhanced {WCRT} Analysis",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3275154",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "This article proposes a novel optimization technique
of fault-tolerant mixed-criticality multi-core systems
with worst-case response time (WCRT) guarantees.
Typically, in fault-tolerant multi-core systems, tasks
can be replicated or re-executed in order to enhance
the reliability. In addition, based on the policy of
mixed-criticality scheduling, low-criticality tasks can
be dropped at runtime. Such uncertainties caused by
hardening and mixed-criticality scheduling make WCRT
analysis very difficult. We show that previous analysis
techniques are pessimistic as they consider avoidably
extreme cases that can be safely ignored within the
given reliability constraint. We improve the analysis
in order to tighten the pessimism of WCRT estimates by
considering the maximum number of faults to be
tolerated. Further, we improve the mixed-criticality
scheduling by allowing partial dropping of
low-criticality tasks. On top of those, we explore the
design space of hardening, task-to-core mapping, and
quality-of-service of the multi-core mixed-criticality
systems. The effectiveness of the proposed technique is
verified by extensive experiments with synthetic and
real-life benchmarks.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2019:BFB,
author = "Irith Pomeranz",
title = "Boundary-Functional Broadside and Skewed-Load Tests",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3276976",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Close-to-functional broadside tests are used for
avoiding overtesting of delay faults that can result
from non-functional operation conditions, while
avoiding test escapes because of faults that cannot be
detected under functional operation conditions. When a
close-to-functional broadside test deviates from
functional operation conditions, the deviation can
affect the entire circuit. This article defines the
concept of a boundary-functional broadside test where
non-functional operation conditions are prevented from
crossing a preselected boundary. Using the procedure
described in this article, the boundary maintains the
same values under a boundary-functional broadside test
as under a functional broadside test from which it is
derived. Indirectly, this ensures that the deviations
from functional operation conditions throughout the
entire circuit are limited. The concept of a
boundary-functional broadside test is extended to
skewed-load tests, and to partial-boundary-functional
tests. Experimental results are presented for benchmark
circuits to demonstrate the fault coverage improvements
that can be achieved using boundary-functional
broadside and skewed-load tests as well as
partial-boundary-functional tests of both types.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2019:SEA,
author = "Jiajun Li and Guihai Yan and Wenyan Lu and Shijun Gong
and Shuhao Jiang and Jingya Wu and Xiaowei Li",
title = "{SynergyFlow}: an Elastic Accelerator Architecture
Supporting Batch Processing of Large-Scale Deep Neural
Networks",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3275243",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Neural networks (NNs) have achieved great success in a
broad range of applications. As NN-based methods are
often both computation and memory intensive,
accelerator solutions have been proved to be highly
promising in terms of both performance and energy
efficiency. Although prior solutions can deliver high
computational throughput for convolutional layers, they
could incur severe performance degradation when
accommodating the entire network model, because there
exist very diverse computing and memory bandwidth
requirements between convolutional layers and fully
connected layers and, furthermore, among different NN
models. To overcome this problem, we proposed an
elastic accelerator architecture, called SynergyFlow,
which intrinsically supports layer-level and
model-level parallelism for large-scale deep neural
networks. SynergyFlow boosts the resource utilization
by exploiting the complementary effect of resource
demanding in different layers and different NN models.
SynergyFlow can dynamically reconfigure itself
according to the workload characteristics, maintaining
a high performance and high resource utilization among
various models. As a case study, we implement
SynergyFlow on a P395-AB FPGA board. Under 100MHz
working frequency, our implementation improves the
performance by 33.8\% on average (up to 67.2\% on
AlexNet) compared to comparable provisioned previous
architectures.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Smirnov:2019:AOV,
author = "Fedor Smirnov and Felix Reimann and J{\"u}rgen Teich
and Michael Gla{\ss}",
title = "Automatic Optimization of the {VLAN} Partitioning in
Automotive Communication Networks",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3278120",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Dividing the communication network into so-called
Virtual Local Area Networks (VLANs), i.e., subnetworks
that are isolated at the data link layer (OSI layer 2),
is a promising approach to address the increasing
security challenges in automotive networks. The
automation of the VLAN partitioning is a
well-researched problem in the domain of local or
metropolitan area networks. However, the approaches
used there are hardly applicable for the design of
automotive networks as they mainly focus on reducing
the amount of broadcast traffic and cannot capture the
many design objectives of automotive networks like the
message timing or the link load, which are affected by
the VLAN partitioning. As a remedy, this article
proposes an approach based on a set of Pseudo-Boolean
constraints to generate a message routing which is
feasible with respect to the VLAN-related routing
restrictions in automotive networks. This approach can
be used for a design space exploration to optimize not
only the VLAN partitioning but also other
routing-related objectives. We demonstrate both the
efficiency of our message routing approach and the now
accessible optimization potential for the complete
Electric/Electronic architecture with a
mixed-criticality system from the automotive domain.
There we thoroughly investigate the impact of the VLAN
partitioning on the message timing and the link loads
by optimizing these design objectives concurrently.
During the exploration of the huge design space, where
each resource can be assigned to one of four VLANs, our
approach requires less than 40ms for the creation of a
valid solution and ensures that all messages satisfy
their deadlines and link load bounds.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Huang:2019:ILA,
author = "Bo-Yuan Huang and Hongce Zhang and Pramod Subramanyan
and Yakir Vizel and Aarti Gupta and Sharad Malik",
title = "Instruction-Level Abstraction {(ILA)}: a Uniform
Specification for System-on-Chip {(SoC)} Verification",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3282444",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Modern Systems-on-Chip (SoC) designs are increasingly
heterogeneous and contain specialized semi-programmable
accelerators in addition to programmable processors. In
contrast to the pre-accelerator era, when the ISA
played an important role in verification by enabling a
clean separation of concerns between software and
hardware, verification of these ``accelerator-rich''
SoCs presents new challenges. From the perspective of
hardware designers, there is a lack of a common
framework for formal functional specification of
accelerator behavior. From the perspective of software
developers, there exists no unified framework for
reasoning about software/hardware interactions of
programs that interact with accelerators. This article
addresses these challenges by providing a formal
specification and high-level abstraction for
accelerator functional behavior. It formalizes the
concept of an Instruction Level Abstraction (ILA),
developed informally in our previous work, and shows
its application in modeling and verification of
accelerators. This formal ILA extends the familiar
notion of instructions to accelerators and provides a
uniform, modular, and hierarchical abstraction for
modeling software-visible behavior of both accelerators
and programmable processors. We demonstrate the
applicability of the ILA through several case studies
of accelerators (for image processing, machine
learning, and cryptography), and a general-purpose
processor (RISC-V). We show how the ILA model
facilitates equivalence checking between two ILAs, and
between an ILA and its hardware finite-state machine
(FSM) implementation. Further, this equivalence
checking supports accelerator upgrades using the notion
of ILA compatibility, similar to processor upgrades
using ISA compatibility.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Carpent:2019:RAS,
author = "Xavier Carpent and Norrathep Rattanavipanon and Gene
Tsudik",
title = "Remote Attestation via Self-Measurement",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3279950",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Remote attestation (RA) is a popular means of
detecting malware in embedded and IoT devices. RA is
usually realized as an interactive protocol, whereby a
trusted party ( verifier ) measures software integrity
of a potentially compromised remote device ( prover).
Early work focused on purely software-based and fully
hardware-based techniques, neither of which is ideal
for low-end embedded devices. More recent results
yielded hybrid (SW/HW) architectures with a minimal set
of features to support efficient and secure RA on
low-end devices. All prior techniques require on-demand
operation, i.e., RA is performed in real time. We
identify some drawbacks of this general approach in the
context of unattended devices: First, it fails to
detect mobile malware that enters and leaves prover
between successive RA instances. Second, it requires
prover to engage in a potentially expensive (in terms
of time and energy) computation, which can be harmful
for mission-critical or real-time devices. To address
these drawbacks, we introduce the concept of
self-measurement, whereby prover periodically and
securely measures and records its own software state,
based on a pre-established schedule. A (possibly
untrusted) verifier occasionally collects and verifies
these measurements. We present the design of a concrete
technique, called Efficient Remote Attestation via
Self-Measurement for Unattended Settings, (ERASMUS),
justify its features and evaluate its performance. In
the process, we also define a new metric, Quality of
Attestation (QoA). We believe that ERASMUS is well
suited for time-sensitive and/or safety-critical
applications that are not served well by on-demand RA.
Finally, we show that ERASMUS is a promising stepping
stone toward handling attestation of multiple devices
(i.e., a group or swarm) with high mobility.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Tan:2019:EMI,
author = "Jingweijia Tan and Kaige Yan",
title = "Efficiently Managing the Impact of Hardware
Variability on {GPUs}' Streaming Processors",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3287308",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Graphics Processing Units (GPUs) are widely used in
general-purpose high-performance computing fields due
to their highly parallel architecture. In recent years,
a new era with the nanometer scale integrated circuit
manufacture process has come. As a consequence, GPUs'
computation capability gets even stronger. However, as
process technology scales down, hardware variability,
e.g., process variations (PVs) and negative bias
temperature instability (NBTI), has a higher impact on
the chip quality. The parallelism of GPU desires high
consistency of hardware units on chip; otherwise, the
worst unit will inevitably become the bottleneck. So
the hardware variability becomes a pressing concern to
further improve GPUs' performance and lifetime, not
only in integrated circuit fabrication, but more in GPU
architecture design. Streaming Processors (SPs) are the
key units in GPUs, which perform most of parallel
computing operations. Therefore, in this work, we focus
on mitigating the impact of hardware variability in GPU
SPs. We first model and analyze SPs' performance
variations under hardware variability. Then, we observe
that both PV and NBTI have a large impact on SPs'
performance. We further observe unbalanced SP
utilization, e.g., some SPs are idle when others are
active, during program execution. Leveraging this
observation, we propose a Hardware Variability-aware
SPs' Management policy (HVSM), which dynamically
dispatches computation in appropriate SPs to balance
the utilizations. In addition, we find that a large
portion of compute operations are duplicate. We also
propose an Operation Compression (OC) technique to
minimize the unnecessary computations to further
mitigate the hardware variability effects. Our
experimental results show the combined HVSM and OC
technique effectively reduces the impact of hardware
variability, which can translate to 37\% performance
improvement or 18.3\% lifetime extension for a GPU
chip.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Kang:2019:TDF,
author = "Ilgweon Kang and Fang Qiao and Dongwon Park and Daniel
Kane and Evangeline Fung Yu Young and Chung-Kuan Cheng
and Ronald Graham",
title = "Three-dimensional Floorplan Representations by Using
Corner Links and Partial Order",
journal = j-TODAES,
volume = "24",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3289179",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:40 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "Three-dimensional integrated circuit (3D IC)
technology offers a potential breakthrough to enable a
paradigm-shift strategy, called ``more than Moore,''
with novel features and advantages over the
conventional 2D process technology. By having
three-dimensional interconnections, 3D IC provides
substantial wirelength reduction and a massive amount
of bandwidth, which gives significant performance
improvement to overcome many of the nontrivial
challenges in semiconductor industry. Moreover, 3D
integration technology enables to stack disparate
technologies with various functionalities into a single
system-in-package (SiP), introducing ``true 3D IC''
design. As the first physical design (PD) step, IC
floorplanning takes a crucial role to determine IC's
overall design qualities such as footprint area, timing
closure, power distribution, thermal management, and so
on. However, lack of efficient 3D floorplanning
algorithms that practically implement advantages of 3D
integration technology is a critical bottleneck for PD
automation of 3D IC design and implementation. 3D
floorplanning (or packing, block partitioning) is a
well-known NP-hard problem, and most of 3D
floorplanning algorithms rely on heuristics and
iterative improvements. Thus, developing complete and
efficient 3D floorplan representations is important,
since floorplan representation provides the foundation
of data structure to search the solution space for 3D
IC floorplanning. A well-defined floorplan
representation provides a well-organized and
cost-effective methodology to design high-performance
3D IC. We propose a new 3D IC floorplan representation
methodology using corner links and partial order. Given
a fixed number of cuboidal blocks and their volume,
algorithmic 3D floorplan representations describe
topological structure and physical
positions/orientations of each block relative to the
origin in the 3D floorplan space. In this article, (1)
we introduce our novel 3D floorplan representation,
called corner links representation, (2) we analyze the
equivalence relation between the corner links
representation and its corresponding partial order
representation, and (3) we discuss several key
properties of the corner links representation and
partial order representation. The corner links
representation provides a complete and efficient
structure to assemble the original 3D mosaic floorplan.
Also, the corner links representation for the
non-degenerate 3D mosaic floorplan can be equivalently
expressed by the four trees representation. The partial
order representation defines the topological structure
of the 3D floorplan with three transitive closure
graphs (TCG) for each direction and captures all
stitching planes in the 3D floorplan in the order of
their respective directions. We demonstrate that the
corner links representation can be reduced to its
corresponding partial order representation, indicating
that the corner links representation shares
well-defined and -studied features/properties of 3D
TCG-based floorplan representation. If the partial
order representation describes relations between any
pairs of blocks in the 3D floorplan, then the floorplan
is a valid floorplan. We show that the partial order
representation can restore the absolute coordinates of
all blocks in the 3D mosaic floorplan by using the
given physical dimensions of blocks.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gong:2019:PEH,
author = "Yanping Gong and Fengyu Qian and Lei Wang",
title = "Probabilistic Evaluation of Hardware Security
Vulnerabilities",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "14:1--14:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3290405",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3290405",
abstract = "Various design techniques can be applied to implement
the finite state machine (FSM) functions in order to
optimize timing, performance, power, and to reduce
overhead. Recently, malicious attacks to hardware
systems have emerged as a critical problem. Fault
injection attacks, in particular, alter the function or
reveal the critical information of a hardware system
through precisely controlled fault injection processes.
Attackers can utilize the loopholes and vulnerabilities
of FSM functions to access the states that are under
protection. A probabilistic model is developed in this
article to evaluate the potential vulnerabilities of
FSM circuits at the design stage. Analysis based on the
statistical behaviors of FSM also shows that the
induced circuit errors can be exploited to access the
protected states. An effective solution based on state
re-encoding is proposed to minimize the risk of
unauthorized transitions. Simulation results
demonstrate that vulnerable transition paths can be
protected with small hardware overheads.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Zheng:2019:HEB,
author = "Jianwei Zheng and Chao Lu and Jiefeng Guo and Deming
Chen and Donghui Guo",
title = "A Hardware-Efficient Block Matching Algorithm and Its
Hardware Design for Variable Block Size Motion
Estimation in Ultra-High-Definition Video Encoding",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "15:1--15:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3290408",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3290408",
abstract = "Variable block size motion estimation has contributed
greatly to achieving an optimal interframe encoding,
but involves high computational complexity and huge
memory access, which is the most critical bottleneck in
ultra-high-definition video encoding. This article
presents a hardware-efficient block matching algorithm
with an efficient hardware design that is able to
reduce the computational complexity of motion
estimation while providing a sustained and steady
coding performance for high-quality video encoding. A
three-level memory organization is proposed to reduce
memory bandwidth requirement while supporting a
predictive common search window. By applying multiple
search strategies and early termination, the proposed
design provides 1.8 to 3.7 times higher hardware
efficiency than other works. Furthermore, on-chip
memory has been reduced by 96.5\% and off-chip
bandwidth requirement has been reduced by 39.4\% thanks
to the proposed three-level memory organization. The
corresponding power consumption is only 198mW at the
highest working frequency of 500MHz. The proposed
design is attractive for high-quality video encoding in
real-time applications with low power consumption.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bakhshalipour:2019:RWT,
author = "Mohammad Bakhshalipour and Aydin Faraji and Seyed
Armin Vakil Ghahani and Farid Samandi and Pejman
Lotfi-Kamran and Hamid Sarbazi-Azad",
title = "Reducing Writebacks Through In-Cache Displacement",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "16:1--16:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3289187",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3289187",
abstract = "Non-Volatile Memory (NVM) technology is a promising
solution to fulfill the ever-growing need for higher
capacity in the main memory of modern systems. Despite
having many great features, however, NVM's poor write
performance remains a severe obstacle, preventing it
from being used as a DRAM alternative in the main
memory. Most of the prior work targeted optimizing
writes at the main memory side and neglected the
decisive role of upper-level cache management policies
on reducing the number of writes. In this article, we
propose a novel cache management policy that attempts
to maximize write-coalescing in the on-chip SRAM
last-level cache (LLC) for the sake of reducing the
number of costly writes to the off-chip NVM. We
decouple a few physical ways of the LLC to have a
dedicated and exclusive storage for the dirty blocks
after being evicted from the cache and before being
sent to the off-chip memory. By displacing dirty blocks
in exclusive storage, they are kept in the cache based
on their rewrite distance and are evicted when they are
unlikely to be reused shortly. To maximize the
effectiveness of exclusive storage, we manage it as a
Cuckoo Cache to offer associativity based on the
various applications' demands. Through detailed
evaluations targeting various single- and
multi-threaded applications, we show that our proposal
reduces the number of writebacks by 21\%, on average,
over the state-of-the-art method and enhances both
performance and energy efficiency.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Bhowmik:2019:PAT,
author = "Biswajit Bhowmik and Jatindra Kumar Deka and Santosh
Biswas and Bhargab B. Bhattacharya",
title = "Performance-Aware Test Scheduling for Diagnosing
Coexistent Channel Faults in Topology-Agnostic
Networks-on-Chip",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3291532",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3291532",
abstract = "High--performance multiprocessor SoCs used in practice
require a complex network-on-chip (NoC) as
communication architecture, and the channels therein
often suffer from various manufacturing defects. Such
physical defects cause a multitude of system-level
failures and subsequent degradation of reliability,
yield, and performance of the computing platform. Most
of the existing test approaches consider mesh-based NoC
channels only and do not perform well for other regular
topologies such as octagons or spidergons, with regard
to test time and overhead issues. This article proposes
a topology-agnostic test mechanism that is capable of
diagnosing on-line, coexistent channel-short, and
stuck-at faults in these special NoCs as well as in
traditional mesh architectures. We introduce a new test
model called Damaru to decompose the network and
present an efficient scheduling scheme to reduce test
time without compromising resource utilization during
testing. Additionally, the proposed scheduling scheme
scales well with network size, channel width, and
topological diversity. Simulation results show that the
method achieves nearly 92\% fault coverage and improves
area overhead by almost 60\% and test time by 98\%
compared to earlier approaches. As a sequel, packet
latency and energy consumption are also improved by
67.05\% and 54.69\%, respectively, and they are further
improved with increasing network size.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pourshirazi:2019:WAL,
author = "Bahareh Pourshirazi and Majed Valad Beigi and Zhichun
Zhu and Gokhan Memik",
title = "Writeback-Aware {LLC} Management for {PCM-Based} Main
Memory Systems",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3292009",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3292009",
abstract = "With the increase in the number of data-intensive
applications on today's workloads, DRAM-based main
memories are struggling to satisfy the growing data
demand capacity. Phase Change Memory (PCM) is a type of
non-volatile memory technology that has been explored
as a promising alternative for DRAM-based main memories
due to its better scalability and lower leakage energy.
Despite its many advantages, PCM also has shortcomings
such as long write latency, high write energy
consumption, and limited write endurance, which are all
related to the write operations. In this article, we
propose a novel writeback-aware Last Level Cache (LLC)
management scheme named WALL to reduce the number of
LLC writebacks and consequently improve performance,
energy efficiency, and lifetime of a PCM-based main
memory system. First, we investigate the writeback
behavior of LLC sets and show that writebacks are not
uniformly distributed among sets; some sets observe
much higher writeback rates than others. We then
propose a writeback-aware set-balancing mechanism,
which employs the underutilized LLC sets with few
writebacks as an auxiliary storage for the evicted
dirty lines from sets with frequent writebacks. We also
propose a simple and effective writeback-aware
replacement policy to avoid the eviction of the dirty
blocks that are highly reused after being evicted from
the cache. Our experimental results show that WALL
achieves an average of 30.9\% reduction in the total
number of LLC writebacks, compared to the baseline
scheme, which uses the LRU replacement policy. As a
result, WALL can reduce the memory energy consumption
by 23.1\% and enhance PCM lifetime by $ 1.29 \times $,
on average, on an 8-core system with a 4GB PCM main
memory, running memory-intensive applications.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Muhammad:2019:RBS,
author = "Shaheer Muhammad and M. Usman Rafique and Shuai Li and
Zili Shao and Qixin Wang and Xue Liu",
title = "Reconfigurable Battery Systems: a Survey on Hardware
Architecture and Research Challenges",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3301301",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301301",
abstract = "In a reconfigurable battery pack, the connections
among cells can be changed during operation to form
different configurations. This can lead a battery, a
passive two-terminal device, to a smart battery that
can reconfigure itself according to the requirement to
enhance operational performance. Several hardware
architectures with different levels of complexities
have been proposed. Some researchers have used existing
hardware and demonstrated improved performance on the
basis of novel optimization and scheduling algorithms.
The possibility of software techniques to benefit the
energy storage systems is exciting, and it is the
perfect time for such methods as the need for
high-performance and long-lasting batteries is on the
rise. This novel field requires new understanding,
principles, and evaluation metrics of proposed schemes.
In this article, we systematically discuss and
critically review the state of the art. This is the
first effort to compare the existing hardware
topologies in terms of flexibility and functionality.
We provide a comprehensive review that encompasses all
existing research works, starting from the details of
the individual battery including modeling and
properties as well as fixed-topology traditional
battery packs. To stimulate further research in this
area, we highlight key challenges and open problems in
this domain.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Sahoo:2019:FMV,
author = "Debiprasanna Sahoo and Swaraj Sha and Manoranjan
Satpathy and Madhu Mutyam and S. Ramesh and Partha
Roop",
title = "Formal Modeling and Verification of a Victim {DRAM}
Cache",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3306491",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3306491",
abstract = "The emerging Die-stacking technology enables DRAM to
be used as a cache to break the ``Memory Wall''
problem. Recent studies have proposed to use DRAM as a
victim cache in both CPU and GPU memory hierarchies to
improve performance. DRAM caches are large in size and,
hence, when realized as a victim cache, non-inclusive
design is preferred. This non-inclusive design adds
significant differences to the conventional DRAM cache
design in terms of its probe, fill, and writeback
policies. Design and verification of a victim DRAM
cache can be much more complex than that of a
conventional DRAM cache. Hence, without rigorous
modeling and formal verification, ensuring the
correctness of such a system can be difficult. The
major focus of this work is to show how formal modeling
is applied to design and verify a victim DRAM cache. In
this approach, we identify the agents in the victim
DRAM cache design and model them in terms of
interacting state machines. We derive a set of
properties from the specifications of a victim cache
and encode them using Linear Temporal Logic. The
properties are then proven using symbolic and bounded
model checking. Finally, we discuss how these
properties are related to the dataflow paths in a
victim DRAM cache.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Gupta:2019:DAD,
author = "Ankur Gupta and Juinn-Dar Huang and Shigeru Yamashita
and Sudip Roy",
title = "Design Automation for Dilution of a Fluid Using
Programmable Microfluidic Device-Based Biochips",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3306492",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3306492",
abstract = "Microfluidic lab-on-a-chip has emerged as a new
technology for implementing biochemical protocols on
small-sized portable devices targeting low-cost medical
diagnostics. Among various efforts of fabrication of
such chips, a relatively new technology is a
programmable microfluidic device (PMD) for
implementation of flow-based lab-on-a-chip. A PMD chip
is suitable for automation due to its symmetric nature.
In order to implement a bioprotocol on such a
reconfigurable device, it is crucial to automate a
sample preparation on-chip as well. In this article, we
propose a dilution PMD algorithm (namely DPMD ) and its
architectural mapping scheme (namely generalized
architectural mapping algorithm ( GAMA )) for
addressing fluidic cells of such a device to perform
dilution of a reagent fluid on-chip. We used an
optimization function that first minimizes the number
of mixing steps and then reduces the waste generation
and further reagent requirement. Simulation results
show that the proposed DPMD scheme is comparative to
the existing state-of-the-art dilution algorithm. The
proposed design automation using the architectural
mapping scheme reduces the required chip area and,
hence, minimizes the valve switching that, in turn,
increases the life span of the PMD-chip.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Jung:2019:ILP,
author = "Jinwook Jung and Gi-Joon Nam and Woohyun Chung and
Youngsoo Shin",
title = "Integrated Latch Placement and Cloning for Timing
Optimization",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3301613",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301613",
abstract = "This article presents an algorithm for integrated
timing-driven latch placement and cloning. Given a
circuit placement, the proposed algorithm relocates
some latches while circuit timing is improved. Some
latches are replicated to further improve the timing;
the number of replicated latches along with their
locations are automatically determined. After latch
cloning, each of the replicated latches is set to drive
a subset of the fanouts that have been driven by the
original single latch. The proposed algorithm is then
extended such that relocation and cloning are applied
to some latches together with their neighbor logic
gates. Experimental results demonstrate that the worst
negative slack and the total negative slack are
improved by 24\% and 59\%, respectively, on average of
test circuits. The negative impacts on circuit area and
power consumption are both marginal, at 0.7\% and 1.9\%
respectively.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Pomeranz:2019:ITU,
author = "Irith Pomeranz",
title = "Incomplete Tests for Undetectable Faults to Improve
Test Set Quality",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3306493",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3306493",
abstract = "The presence of undetectable faults in a set of target
faults implies that tests, which may be important for
detecting defects, are missing from the test set. This
article suggests an approach for addressing missing
tests that fits with the rationale for computing an
$n$-detection test set. The article defines the concept
of an incomplete test that is relevant when a target
fault is undetectable. An incomplete test activates the
fault but fails to detect it because of one or more
assignments that are missing from the test. The
procedure described in this article improves the
quality of a test set by attempting to ensure that
every undetectable fault has n incomplete tests with
the smallest possible numbers of missing assignments,
for a constant n {$>$}= 1. The incomplete tests are
expected to contribute to the detection of detectable
defects around the site of the undetectable fault. The
computation of missing assignments for a test is
performed in linear time by avoiding fault simulation
and considering all the undetectable faults
simultaneously. Experimental results demonstrate the
extent to which a given test set can be improved
without increasing the number of tests.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Hyun:2019:IAA,
author = "Daijoon Hyun and Youngsoo Shin",
title = "Integrated Approach of Airgap Insertion for Circuit
Timing Optimization",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3306494",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3306494",
abstract = "Airgap technology enables air to be introduced in
inter-metal dielectric (IMD). Airgap between certain
wires reduces coupling capacitance due to the reduced
permittivity; this can be utilized to decrease circuit
delay. We propose an integrated approach of airgap
insertion with the goal of circuit timing optimization.
It consists of three sub-problems. We first select the
layers that employ airgap, called airgap layers, that
maximize total negative slack (TNS) improvement; this
yields TNS improvement of 7\% to 15\% and worst
negative slack (WNS) improvement of 2\% to 8\%,
compared to a simple assumption of airgap layers.
Second, we reassign the layers of wires such that more
wires on critical paths can be placed in airgap layers.
This is formulated as integer linear programming (ILP),
and a more practical heuristic algorithm is also
proposed. It provides an additional 17\% TNS
improvement and 6\% WNS improvement. Finally, we
perform airgap insertion through ILP formulation, where
a number of design rules are modeled with linear
constraints. To reduce the heavy runtime of ILP, a
layout partitioning technique is also applied. It
implements a feasible airgap mask in a manageable time
where the amount of inserted airgap is close to the
optimal solution.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Li:2019:NRM,
author = "Taozhong Li and Qin Wang and Yongxin Zhu and Jianfei
Jiang and Guanghui He and Jing Jin and Zhigang Mao and
Naifeng Jing",
title = "A Novel Resistive Memory-based Process-in-memory
Architecture for Efficient Logic and Add Operations",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3306495",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "The coming era of big data revives the
Processing-in-memory (PIM) architecture to relieve the
memory wall problem that embarrasses the modern
computing system. However, most existing PIM designs
just put computing units closer to memory, rather than
a complete integration of them due to their
incompatibility in CMOS manufacturing. Fortunately, the
emerging Resistive-RAM (ReRAM) offers new hope to this
dilemma owing to its inherent memory and computing
capability using the same device. In this article, we
propose a ReRAM memory structure with efficient PIM
capability of both logic and add operations. It first
leverages non-linearity to suppress sneak current and
thus sustains high memory density. Using a differential
bit cell, it also enables efficient processing of
arbitrary logic functions using the same memory cells
with non-destructive operations. Then, a novel PIM
adder is proposed, which customizes a sneak current
path as the carry-chain for fast carry propagation and
improves adder performance significantly. In the
experiment, the proposed PIM demonstrates higher
efficiency in both computing area and performance for
logic and addition, which greatly increases the ReRAM
PIM applicability for future computable
architectures.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Nongpoh:2019:ESE,
author = "Bernard Nongpoh and Rajarshi Ray and Moumita Das and
Ansuman Banerjee",
title = "Enhancing Speculative Execution With Selective
Approximate Computing",
journal = j-TODAES,
volume = "24",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3307651",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Mar 22 16:58:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3307651",
abstract = "Speculative execution is an optimization technique
used in modern processors by which predicted
instructions are executed in advance with an objective
of overlapping the latencies of slow operations. Branch
prediction and load value speculation are examples of
speculative execution used in modern pipelined
processors to avoid execution stalls. However,
speculative executions incur a performance penalty as
an execution rollback when there is a misprediction. In
this work, we propose to aid speculative execution with
approximate computing by relaxing the execution
rollback penalty associated with a misprediction. We
propose a sensitivity analysis method for data and
branches in a program to identify the data load and
branch instructions that can be executed without any
rollback in the pipeline and yet can ensure a certain
user-specified quality of service of the application
with a probabilistic reliability. Our analysis is based
on statistical methods, particularly hypothesis testing
and Bayesian analysis. We perform an architectural
simulation of our proposed approximate execution and
report the benefits in terms of CPU cycles and energy
utilization on selected applications from the AxBench,
ACCEPT, and Parsec 3.0 benchmarks suite.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Vinco:2019:CLV,
author = "Sara Vinco and Nicola Bombieri and Daniele Jahier
Pagliari and Franco Fummi and Enrico Macii and Massimo
Poncino",
title = "A Cross-level Verification Methodology for Digital
{IPs} Augmented with Embedded Timing Monitors",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "27:1--27:23",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3308565",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3308565",
abstract = "Smart systems are characterized by the integration in
a single device of multi-domain subsystems of different
technological domains, namely, analog, digital,
discrete and power devices, MEMS, and power sources.
Such challenges, emerging from the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Oh:2019:TAS,
author = "Deok Keun Oh and Mu Jun Choi and Ju Ho Kim",
title = "Thermal-aware {$3$D} Symmetrical Buffered Clock Tree
Synthesis",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "28:1--28:22",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3313798",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3313798",
abstract = "The semiconductor industry has accepted
three-dimensional integrated circuits (3D ICs) as a
possible solution to address speed and power management
problems. In addition, 3D ICs have recently
demonstrated a huge potential in reducing wire length
and \ldots{}",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Schwarzer:2019:CDA,
author = "Tobias Schwarzer and Joachim Falk and Simone
M{\"u}ller and Martin Letras and Christian Heidorn and
Stefan Wildermann and J{\"u}rgen Teich",
title = "Compilation of Dataflow Applications for Multi-Cores
using Adaptive Multi-Objective Optimization",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "29:1--29:23",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3310249",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3310249",
abstract = "State-of-the-art system synthesis techniques employ
meta-heuristic optimization techniques for Design Space
Exploration (DSE) to tailor application execution,
e.g., defined by a dataflow graph, for a given target
platform. Unfortunately, the performance \ldots{}",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tu:2019:AOS,
author = "Chia-Heng Tu and Te-Sheng Lin",
title = "Augmenting Operating Systems with {OpenCL}
Accelerators",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "30:1--30:29",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3315569",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3315569",
abstract = "Heterogeneous computing leverages more than one kind
of processors to boost the performance of user-space
applications with the heterogeneous programming
languages, e.g., OpenCL. While some works have been
done to accelerate the computations required by
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xu:2019:ESC,
author = "Xiaolin Xu and Fahim Rahman and Bicky Shakya and
Apostol Vassilev and Domenic Forte and Mark
Tehranipoor",
title = "Electronics Supply Chain Integrity Enabled by
Blockchain",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "31:1--31:25",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3315571",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3315571",
abstract = "Electronic systems are ubiquitous today, playing an
irreplaceable role in our personal lives, as well as in
critical infrastructures such as power grids, satellite
communications, and public transportation. In the past
few decades, the security of \ldots{}",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Valencia:2019:CPA,
author = "Juan Valencia and Dip Goswami and Kees Goossens",
title = "Comparing Platform-aware Control Design Flows for
Composable and Predictable {TDM}-based Execution
Platforms",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "32:1--32:26",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3315572",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3315572",
abstract = "We compare three platform-aware feedback control
design flows that are tailored for a composable and
predictable Time Division Multiplexing (TDM)-based
execution platform. The platform allows for independent
execution of multiple applications. Using the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lu:2019:DDA,
author = "Sixing Lu and Roman Lysecky",
title = "Data-driven Anomaly Detection with Timing Features for
Embedded Systems",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "33:1--33:27",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3279949",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3279949",
abstract = "Malware is a serious threat to network-connected
embedded systems, as evidenced by the continued and
rapid growth of such devices, commonly referred to as
the Internet of Things. Their ubiquitous use in
critical applications require robust protection to
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Metwalli:2019:SAS,
author = "Sara Ayman Metwalli and Yuko Hara-Azumi",
title = "{SSA-AC}: Static Significance Analysis for Approximate
Computing",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "34:1--34:17",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3314575",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3314575",
abstract = "Recently, the quest to reduce energy consumption in
digital systems has been the subject of a number of
ongoing studies. One of the most researched focuses is
approximate computing (AC). AC is a new computing
paradigm in both hardware and software \ldots{}",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Monteiro:2019:OCF,
author = "Jucemar Monteiro and Marcelo Johann and Laleh Behjat",
title = "An Optimized Cost Flow Algorithm to Spread Cells in
Detailed Placement",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "35:1--35:16",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3317575",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3317575",
abstract = "Placement is an important and challenging step in VLSI
physical design. The placement solution can
significantly impact timing and routability. In
sub-nanometric technology nodes, several restrictions
have been imposed on the placement solutions. These
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Islam:2019:EIT,
author = "Md Nazmul Islam and Sandip Kundu",
title = "Enabling {IC} Traceability via Blockchain Pegged to
Embedded {PUF}",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "36:1--36:23",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3315669",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3315669",
abstract = "Globalization of IC supply chain has increased the
risk of counterfeit, tampered, and re-packaged chips in
the market. Counterfeit electronics poses a security
risk in safety critical applications like avionics,
SCADA systems, and defense. It also \ldots{}",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wan:2019:DRP,
author = "Bo Wan and Xi Li and Bo Zhang and Caixu Zhao and
Xianglan Chen and Chao Wang and Xuehai Zhou",
title = "{DCW}: a Reactive and Predictable Programming
Framework for {LET}-Based Distributed Real-Time
Systems",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "37:1--37:35",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3317574",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3317574",
abstract = "Real-time systems continuously interact with the
physical environment and often have to satisfy
stringent timing constraints imposed by their
interactions. Those systems involve two main
properties: reactivity and predictability. Reactivity
allows the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Basu:2019:CBA,
author = "Kanad Basu and Samah Mohamed Saeed and Christian
Pilato and Mohammed Ashraf and Mohammed Thari Nabeel
and Krishnendu Chakrabarty and Ramesh Karri",
title = "{CAD-Base}: an Attack Vector into the Electronics
Supply Chain",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "38:1--38:30",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3315574",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3315574",
abstract = "Fabless semiconductor companies design system-on-chips
(SoC) by using third-party intellectual property (IP)
cores and fabricate them in offshore, potentially
untrustworthy foundries. Owing to the globally
distributed electronics supply chain, security
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rokni:2019:SLF,
author = "Seyed Ali Rokni and Hassan Ghasemzadeh",
title = "{Share-n-Learn}: a Framework for Sharing Activity
Recognition Models in Wearable Systems With
Context-Varying Sensors",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "39:1--39:27",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3318044",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3318044",
abstract = "Wearable sensors utilize machine learning algorithms
to infer important events such as the behavioral
routine and health status of their end users from
time-series sensor data. A major obstacle in
large-scale utilization of these systems is that the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zimmermann:2019:ADL,
author = "Thomas Zimmermann and Mathias Mora and Sebastian
Steinhorst and Daniel Mueller-Gritschneder and Andreas
Jossen",
title = "Analysis of Dissipative Losses in Modular
Reconfigurable Energy Storage Systems Using {SystemC
TLM} and {SystemC-AMS}",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "40:1--40:33",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3321387",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3321387",
abstract = "Battery storage systems are becoming more popular in
the automotive industry as well as in stationary
applications. To fulfill the requirements in terms of
power and energy, the literature is increasingly
discussing electrically reconfigurable \ldots{}",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sayed:2019:CAP,
author = "Nour Sayed and Longfei Mao and Rajendra Bishnoi and
Mehdi B. Tahoori",
title = "Compiler-Assisted and Profiling-Based Analysis for
Fast and Efficient {STT-MRAM} On-Chip Cache Design",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "41:1--41:25",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3321693",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3321693",
abstract = "Spin Transfer Torque Magnetic Random Access Memory
(STT-MRAM) is a promising candidate for large on-chip
memories as a zero-leakage, high-density and
non-volatile alternative to the present SRAM
technology. Since memories are the dominating component
of \ldots{}",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2019:LRA,
author = "Naixing Wang and Irith Pomeranz and Sudhakar M. Reddy
and Arani Sinha and Srikanth Venkataraman",
title = "Layout Resynthesis by Applying
Design-for-manufacturability Guidelines to Avoid
Low-coverage Areas of a Cell-based Design",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "42:1--42:19",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3325066",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325066",
abstract = "Design-for-manufacturability (DFM) guidelines are
recommended layout design practices intended to capture
layout features that are difficult to manufacture
correctly. Avoiding such features prevents the
occurrence of potential systematic defects. Layout
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Burcea:2019:MIR,
author = "Florin Burcea and Andreas Herrmann and Bing Li and
Helmut Graeb",
title = "{MEMS-IC} Robustness Optimization Considering
Electrical and Mechanical Design and Process
Parameters",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "43:1--43:24",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3325068",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325068",
abstract = "MEMS-based sensor circuits are traditionally designed
separately using CAD tools specific to each energy
domain (electrical and mechanical). This article
presents a complete approach for combined MEMS-IC
robustness optimization. Advanced methods for
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Afacan:2019:CRC,
author = "Eng{\'\i}n Afacan and G{\"u}nhan D{\"u}ndar and
Fa{\'\i}k Baskaya and Al{\'\i} Emre Pusane and Mustafa
Berke Yelten",
title = "On Chip Reconfigurable {CMOS} Analog Circuit Design
and Automation Against Aging Phenomena: Sense and
React",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "44:1--44:22",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3325069",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325069",
abstract = "Performance of analog circuits degrades over time due
to several time-dependent degradation mechanisms. Due
to the increased aging problems in ever-shrinking
dimensions, reliability of complementary
metal-oxide-semiconductor analog circuits has become a
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2019:ATR,
author = "Yanjun Li and Ender Yilmaz and Pete Sarson and Sule
Ozev",
title = "Adaptive Test for {RF}\slash Analog Circuit Using
Higher Order Correlations among Measurements",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "45:1--45:16",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3308566",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3308566",
abstract = "As process variations increase and devices get more
diverse in their behavior, using the same test list for
all devices is increasingly inefficient. Methodologies
that adapt the test sequence with respect to lot,
wafer, or even a device's own behavior \ldots{}",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2019:CPR,
author = "Chengning Wang and Dan Feng and Wei Tong and Jingning
Liu and Zheng Li and Jiayi Chang and Yang Zhang and
Bing Wu and Jie Xu and Wei Zhao and Yilin Li and Ruoxi
Ren",
title = "Cross-point Resistive Memory: Nonideal Properties and
Solutions",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "46:1--46:37",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3325067",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3325067",
abstract = "Emerging computational resistive memory is promising
to overcome the challenges of scalability and energy
efficiency that DRAM faces and also break through the
memory wall bottleneck. However, cell-level and
array-level nonideal properties of resistive \ldots{}",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jun:2019:FTT,
author = "Jaeyung Jun and Yoonah Paik and Gyeong Il Min and Seon
Wook Kim and Youngsun Han",
title = "Fault Tolerance Technique Offlining Faulty Blocks by
Heap Memory Management",
journal = j-TODAES,
volume = "24",
number = "4",
pages = "47:1--47:25",
month = jul,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3329079",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:31 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3329079",
abstract = "As dynamic random access memory (DRAM) cells continue
to be scaled down for higher density and capacity, they
have more faults. Thus, DRAM reliability becomes a
major concern in computer systems. Previous studies
have proposed many techniques preserving \ldots{}",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Vegesna:2019:NRM,
author = "S. M. Srinivasavarma Vegesna and Ashok Chakravarthy
Nara and Noor Mahammad Sk",
title = "A Novel Rule Mapping on {TCAM} for Power Efficient
Packet Classification",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "48:1--48:23",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3328103",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3328103",
abstract = "Packet Classification is the enabling function
performed in commodity switches for providing various
services such as access control, intrusion detection,
load balancing, and so on. Ternary Content Addressable
Memories (TCAMs) are the de facto standard \ldots{}",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2019:ITD,
author = "Hongfei Wang and Kun He",
title = "Improving Test and Diagnosis Efficiency through
Ensemble Reduction and Learning",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "49:1--49:26",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3328754",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3328754",
abstract = "Machine learning is a powerful lever for developing,
improving, and optimizing test methodologies to cope
with the demand from the advanced nodes. Ensemble
methods are a particular learning paradigm that uses
multiple models to boost performance. In \ldots{}",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cakir:2019:RCH,
author = "Burcin Cakir and Sharad Malik",
title = "Revealing Cluster Hierarchy in Gate-level {ICs} Using
Block Diagrams and Cluster Estimates of Circuit
Embeddings",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "50:1--50:19",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3329081",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3329081",
abstract = "Contemporary integrated circuits (ICs) are
increasingly being constructed using intellectual
property blocks (IPs) obtained from third parties in a
globalized supply chain. The increased vulnerability to
adversarial changes during this untrusted supply
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2019:SIP,
author = "Tengtao Li and Sachin S. Sapatnekar",
title = "Stress-Induced Performance Shifts in {$3$D} {DRAMs}",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "51:1--51:21",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3331527",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3331527",
abstract = "3D-stacked DRAMs can significantly increase cell
density and bandwidth while also lowering power
consumption. However, 3D structures experience
significant thermomechanical stress due to the
differential rate of contraction of the constituent
materials, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chakraborty:2019:ERL,
author = "Shounak Chakraborty and Hemangee K. Kapoor",
title = "Exploring the Role of Large Centralised Caches in
Thermal Efficient Chip Design",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "52:1--52:28",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3339850",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3339850",
abstract = "In the era of short channel length, Dynamic Thermal
Management (DTM) has become a challenging task for the
architects and designers engineering modern Chip
Multi-Processors (CMPs). Ever-increasing demand of
processing power along with the developed \ldots{}",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Choi:2019:RDR,
author = "Kyu Hyun Choi and Jaeyung Jun and Minseong Kim and
Seon Wook Kim",
title = "Reducing {DRAM} Refresh Rate Using Retention Time
Aware Universal Hashing Redundancy Repair",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "53:1--53:31",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3339851",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3339851",
abstract = "As the device capacity of Dynamic Random Access Memory
(DRAM) increases, refresh operation becomes a
significant contributory factor toward total power
consumption and memory throughput of the device. To
reduce the problems associated with the refresh
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2019:TMF,
author = "Xiangwei Li and Douglas L. Maskell",
title = "Time-Multiplexed {FPGA} Overlay Architectures: a
Survey",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "54:1--54:19",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3339861",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3339861",
abstract = "This article presents a comprehensive survey of
time-multiplexed (TM) FPGA overlays from the research
literature. These overlays are categorized based on
their implementation into two groups: processor-based
overlays, as their implementation follows \ldots{}",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gade:2019:EEC,
author = "Sri Harsha Gade and M. Meraj Ahmed and Sujay Deb and
Amlan Ganguly",
title = "Energy Efficient Chip-to-Chip Wireless Interconnection
for Heterogeneous Architectures",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "55:1--55:27",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3340109",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3340109",
abstract = "Heterogeneous multichip architectures have gained
significant interest in high-performance computing
clusters to cater to a wide range of applications. In
particular, heterogeneous systems with multiple
multicore CPUs, GPUs, and memory have become \ldots{}",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Osawa:2019:ADR,
author = "Hisashi Osawa and Yuko Hara-Azumi",
title = "Approximate Data Reuse-based Accelerator Design for
Embedded Processor",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "56:1--56:25",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3342098",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342098",
abstract = "Due to increasing diversity and complexity of
applications in embedded systems, accelerator designs
trading-off area/energy-efficiency and
design-productivity are becoming a further crucial
issue. Targeting applications in the category of
Recognition, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Raval:2019:III,
author = "Rajkumar K. Raval and Atta Badii",
title = "Investigating the Impact of Image Content on the
Energy Efficiency of Hardware-accelerated Digital
Spatial Filters",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "57:1--57:34",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3341819",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3341819",
abstract = "Battery-operated low-power portable computing devices
are becoming an inseparable part of human daily life.
One of the major goals is to achieve the longest
battery life in such a device. Additionally, the need
for performance in processing multimedia \ldots{}",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bonna:2019:MSD,
author = "Ricardo Bonna and Denis S. Loubach and George
Ungureanu and Ingo Sander",
title = "Modeling and Simulation of Dynamic Applications Using
Scenario-Aware Dataflow",
journal = j-TODAES,
volume = "24",
number = "5",
pages = "58:1--58:29",
month = oct,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3342997",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342997",
abstract = "The tradeoff between analyzability and expressiveness
is a key factor when choosing a suitable dataflow model
of computation (MoC) for designing, modeling, and
simulating applications considering a formal base. A
large number of techniques and analysis \ldots{}",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jiang:2019:EEQ,
author = "Li Jiang and Zhuoran Song and Haiyue Song and Chengwen
Xu and Qiang Xu and Naifeng Jing and Weifeng Zhang and
Xiaoyao Liang",
title = "Energy-Efficient and Quality-Assured Approximate
Computing Framework Using a Co-Training Method",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "59:1--59:25",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3342239",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342239",
abstract = "Approximate computing is a promising design paradigm
that introduces a new dimension-error-into the original
design space. By allowing the inexact computation in
error-tolerance applications, approximate computing can
gain both performance and energy \ldots{}",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Charles:2019:ECR,
author = "Subodha Charles and Alif Ahmed and Umit Y. Ogras and
Prabhat Mishra",
title = "Efficient Cache Reconfiguration Using Machine Learning
in {NoC}-Based Many-Core {CMPs}",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "60:1--60:23",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3350422",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3350422",
abstract = "Dynamic cache reconfiguration (DCR) is an effective
technique to optimize energy consumption in many-core
architectures. While early work on DCR has shown
promising energy saving opportunities, prior techniques
are not suitable for many-core \ldots{}",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Song:2019:COR,
author = "Youngsoo Song and Daijoon Hyun and Jingon Lee and
Jinwook Jung and Youngsoo Shin",
title = "Cut Optimization for Redundant Via Insertion in
Self-Aligned Double Patterning",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "61:1--61:21",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3355391",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3355391",
abstract = "Redundant via (RV) insertion helps prevent via defects
and hence leads to yield enhancement. However, RV
insertion in self-aligned double patterning (SADP)
processes is challenging since cut optimization has to
be considered together. In SADP, parallel \ldots{}",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lee:2019:IEC,
author = "Dongjin Lee and Sourav Das and Janardhan Rao Doppa and
Partha Pratim Pande and Krishnendu Chakrabarty",
title = "Impact of Electrostatic Coupling on Monolithic
{$3$D}-enabled Network on Chip",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "62:1--62:22",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3357158",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3357158",
abstract = "Monolithic-3D-integration (M3D) improves the
performance and energy efficiency of 3D ICs over
conventional through-silicon-vias-based counterparts.
The smaller dimensions of monolithic inter-tier vias
offer high-density integration, the flexibility of
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kukkala:2019:JSF,
author = "Vipin Kumar Kukkala and Sudeep Pasricha and Thomas
Bradley",
title = "{JAMS-SG}: a Framework for Jitter-Aware Message
Scheduling for Time-Triggered Automotive Networks",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "63:1--63:31",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3355392",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3355392",
abstract = "Time-triggered automotive networks use time-triggered
protocols (FlexRay, TTEthernet, etc.) for periodic
message transmissions that often originate from safety
and time-critical applications. One of the major
challenges with time-triggered transmissions \ldots{}",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Asgarieh:2019:SHA,
author = "Yashar Asgarieh and Bill Lin",
title = "Smart-Hop Arbitration Request Propagation: Avoiding
Quadratic Arbitration Complexity and False Negatives in
{SMART NoCs}",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "64:1--64:25",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3356235",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3356235",
abstract = "SMART-based NoC designs achieve ultra-low latencies by
enabling flits to traverse multiple hops within a
single clock cycle. Notwithstanding the clear
performance benefits, SMART-based NoCs suffer from
several shortcomings: each router must arbitrate
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Shamsi:2019:IPS,
author = "Kaveh Shamsi and Meng Li and Kenneth Plaks and Saverio
Fazzari and David Z. Pan and Yier Jin",
title = "{IP} Protection and Supply Chain Security through
Logic Obfuscation: a Systematic Overview",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "65:1--65:36",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3342099",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342099",
abstract = "The globalization of the semiconductor supply chain
introduces ever-increasing security and privacy risks.
Two major concerns are IP theft through reverse
engineering and malicious modification of the design.
The latter concern in part relies on \ldots{}",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2019:RTS,
author = "Kankan Wang and Xu Jiang and Nan Guan and Di Liu and
Weichen Liu and Qingxu Deng",
title = "Real-Time Scheduling of {DAG} Tasks with Arbitrary
Deadlines",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "66:1--66:22",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3358603",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3358603",
abstract = "Real-time and embedded systems are shifting from
single-core to multi-core processors, on which the
software must be parallelized to fully utilize the
computation capacity of the hardware. Recently, much
work has been done on real-time scheduling of
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2019:OTL,
author = "Yung-Chih Chen and Li-Cheng Zheng and Fu-Lian Wong",
title = "Optimization of Threshold Logic Networks with Node
Merging and Wire Replacement",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "67:1--67:18",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3358748",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3358748",
abstract = "In this article, we present an optimization method for
threshold logic networks (TLNs) based on observability
don't-care-based node merging. To reduce gate count in
a TLN, it iteratively merges two gates that are
functionally equivalent or whose \ldots{}",
acknowledgement = ack-nhfb,
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yan:2019:TSN,
author = "Jin-Tai Yan",
title = "Two-sided Net Untangling with Internal Detours for
Single-layer Bus Routing",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "68:1--68:23",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3363184",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3363184",
abstract = "It is known that one-sided net untangling can be used
to untangle the twisted nets inside a bus for
single-layer bus routing. However, limited space behind
one pin-row may make one-sided net untangling
unsuccessful for single-layer bus routing. In this
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2019:RSE,
author = "Hai Wang and Tao Xiao and Darong Huang and Lang Zhang
and Chi Zhang and He Tang and Yuan Yuan",
title = "Runtime Stress Estimation for Three-dimensional {IC}
Reliability Management Using Artificial Neural
Network",
journal = j-TODAES,
volume = "24",
number = "6",
pages = "69:1--69:29",
month = nov,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3363185",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:32 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3363185",
abstract = "Heat dissipation and the related thermal-mechanical
stress problems are the major obstacles in the
development of the three-dimensional integrated circuit
(3D IC). Reliability management techniques can be used
to alleviate such problems and enhance the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "69",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mahfouzi:2020:SAR,
author = "Rouhollah Mahfouzi and Amir Aminifar and Soheil Samii
and Petru Eles and Zebo Peng",
title = "Security-aware Routing and Scheduling for Control
Applications on {Ethernet TSN} Networks",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "1:1--1:26",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3358604",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3358604",
abstract = "Today, it is common knowledge in the cyber-physical
systems domain that the tight interaction between the
cyber and physical elements provides the possibility of
substantially improving the performance of these
systems that is otherwise impossible. On \ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Shi:2020:ASF,
author = "Guoyong Shi",
title = "Automatic Stage-form Circuit Reduction for Multistage
Opamp Design Equation Generation",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "2:1--2:26",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3363499",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3363499",
abstract = "An automatic stage-form circuit reduction method for
multistage operational amplifiers (opamps) is proposed.
A tool based on this method can reduce a multistage
opamp into a condensed stage-form macromodel, from
which design equations can be generated \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2020:IBT,
author = "Chih-Hao Wang and Tong-Yu Hsieh",
title = "An Implication-based Test Scheme for Both Diagnosis
and Concurrent Error Detection Applications",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "3:1--3:27",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3364681",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3364681",
abstract = "This article describes a diagnosis-aware hybrid
concurrent error detection ( DAH-CED ) scheme that can
facilitate both off-line and on-line test applications.
By using the proposed scheme, not only the probability
of detecting errors (on-line) but also \ldots{}",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hoque:2020:HPO,
author = "Tamzidul Hoque and Kai Yang and Robert Karam and
Shahin Tajik and Domenic Forte and Mark Tehranipoor and
Swarup Bhunia",
title = "Hidden in Plaintext: an Obfuscation-based
Countermeasure against {FPGA} Bitstream Tampering
Attacks",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "4:1--4:32",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3361147",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3361147",
abstract = "Field Programmable Gate Arrays (FPGAs) have become an
attractive choice for diverse applications due to their
reconfigurability and unique security features.
However, designs mapped to FPGAs are prone to malicious
modifications or tampering of critical \ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bhattacharjee:2020:BCA,
author = "Sukanta Bhattacharjee and Jack Tang and Sudip Poddar
and Mohamed Ibrahim and Ramesh Karri and Krishnendu
Chakrabarty",
title = "Bio-chemical Assay Locking to Thwart Bio-{IP} Theft",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "5:1--5:20",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365579",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365579",
abstract = "It is expected that as digital microfluidic biochips
(DMFBs) mature, the hardware design flow will begin to
resemble the current practice in the semiconductor
industry: design teams send chip layouts to third-party
foundries for fabrication. These \ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Malekpour:2020:HTM,
author = "Amin Malekpour and Roshan Ragel and Tuo Li and Haris
Javaid and Aleksandar Ignjatovic and Sri Parameswaran",
title = "Hardware {Trojan} Mitigation in Pipelined {MPSoCs}",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "6:1--6:27",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365578",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365578",
abstract = "Multiprocessor System-on-Chip (MPSoC) has become
necessary due to the the billions of transistors
available to the designer, the need for fast design
turnaround times, and the power wall. Thus, present
embedded systems are designed with MPSoCs, and one
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pan:2020:ARP,
author = "Renjian Pan and Jun Tao and Yangfeng Su and Dian Zhou
and Xuan Zeng and Xin Li",
title = "Analog\slash {RF} Post-silicon Tuning via {Bayesian}
Optimization",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "7:1--7:17",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365577",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365577",
abstract = "Tunable analog/RF circuit has emerged as a promising
technique to address the significant performance
uncertainties caused by process variations. To optimize
these tunable circuits after fabrication, most existing
post-silicon programming methods are \ldots{}",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xu:2020:MCM,
author = "Qi Xu and Hao Geng and Song Chen and Bei Yu and Feng
Wu",
title = "Memristive Crossbar Mapping for Neuromorphic Computing
Systems on {$3$D} {IC}",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "8:1--8:19",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365576",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365576",
abstract = "In recent years, neuromorphic computing systems based
on memristive crossbar have provided a promising
solution to enable acceleration of neural networks.
However, most of the neural networks used in realistic
applications are often sparse. If such \ldots{}",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Parane:2020:LDL,
author = "Khyamling Parane and Prabhu Prasad B. M. and Basavaraj
Talawar",
title = "{LBNoC}: Design of Low-latency Router Architecture
with Lookahead Bypass for Network-on-Chip Using
{FPGA}",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "9:1--9:26",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365994",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365994",
abstract = "An FPGA-based Network-on-Chip (NoC) using a
low-latency router with a look-ahead bypass (LBNoC) is
discussed in this article. The proposed design targets
the optimized area with improved network performance.
The techniques such as single-cycle router \ldots{}",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Roy:2020:HGM,
author = "Pushpita Roy and Ansuman Banerjee and Robert Wille and
Bhargab B. Bhattacharya",
title = "Harnessing the Granularity of
Micro-Electrode-Dot-Array Architectures for Optimizing
Droplet Routing in Biochips",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "10:1--10:37",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365993",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365993",
abstract = "In this article, we consider the problem of droplet
routing for Microelectrode-Dot-Array (MEDA) biochips.
MEDA biochips today provide a host of useful features
for droplet movement by making it possible to manoeuvre
droplets at a much finer granularity \ldots{}",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Esmaili:2020:EAS,
author = "Amirhossein Esmaili and Mahdi Nazemi and Massoud
Pedram",
title = "Energy-aware Scheduling of Task Graphs with Imprecise
Computations and End-to-end Deadlines",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "11:1--11:21",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365999",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365999",
abstract = "Imprecise computations allow scheduling algorithms
developed for energy-constrained computing devices to
trade off output quality with utilization of system
resources. The goal of such scheduling algorithms is to
utilize imprecise computations to find a \ldots{}",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2020:HER,
author = "Hongfei Wang and Jianwen Li and Kun He",
title = "Hierarchical Ensemble Reduction and Learning for
Resource-constrained Computing",
journal = j-TODAES,
volume = "25",
number = "1",
pages = "12:1--12:21",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3365224",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:33 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365224",
abstract = "Generic tree ensembles (such as Random Forest, RF)
rely on a substantial amount of individual models to
attain desirable performance. The cost of maintaining a
large ensemble could become prohibitive in applications
where computing resources are \ldots{}",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tseng:2020:MAU,
author = "Tien-Hung Tseng and Chung-Han Chou and Kai-Chiang Wu",
title = "Making Aging Useful by Recycling Aging-induced Clock
Skew",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "13:1--13:24",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3363186",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:34 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3363186",
abstract = "Device aging, which causes significant loss on circuit
performance and lifetime, has been a primary factor in
reliability degradation of nanoscale designs. In this
article, we propose to take advantage of aging-induced
clock skews (i.e., make them \ldots{}).",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Richthammer:2020:SSD,
author = "Valentina Richthammer and Fabian Fassnacht and Michael
Gla{\ss}",
title = "Search-space Decomposition for System-level Design
Space Exploration of Embedded Systems",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "14:1--14:32",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3369388",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:34 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369388",
abstract = "The development of large-scale multi- and many-core
platforms and the rising complexity of embedded
applications have led to a significant increase in the
number of implementation possibilities for a single
application. Furthermore, rising demands on \ldots{}",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{He:2020:LHD,
author = "Xu He and Yu Deng and Shizhe Zhou and Rui Li and Yao
Wang and Yang Guo",
title = "Lithography Hotspot Detection with {FFT}-based Feature
Extraction and Imbalanced Learning Rate",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "15:1--15:21",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3372044",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:34 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372044",
abstract = "With the increasing gap between transistor feature
size and lithography manufacturing capability, the
detection of lithography hotspots becomes a key stage
of physical verification flow to enhance manufacturing
yield. Although machine learning \ldots{}",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tadros:2020:TFT,
author = "Ramy N. Tadros and Peter A. Beerel",
title = "A Theoretical Foundation for Timing Synchronous
Systems Using Asynchronous Structures",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "16:1--16:28",
month = mar,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3373355",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Mar 18 07:50:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3373355",
abstract = "Timing of synchronous systems is an everlasting
stumbling block to the booming demands for lower power
consumption and higher operation speeds in the
electronics industry. This hardship is aggravated by
the growing levels of variability in state-of-the-.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liang:2020:SAE,
author = "Tung-Che Liang and Mohammed Shayan and Krishnendu
Chakrabarty and Ramesh Karri",
title = "Secure Assay Execution on {MEDA} Biochips to Thwart
Attacks Using Real-Time Sensing",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "17:1--17:25",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3374213",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:34 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3374213",
abstract = "Digital microfluidic biochips (DMFBs) have emerged as
a promising platform for DNA sequencing, clinical
chemistry, and point-of-care diagnostics. Recent
research has shown that DMFBs are susceptible to
various types of malicious attacks. Defenses \ldots{}",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2020:TFT,
author = "Irith Pomeranz",
title = "Target Faults for Test Compaction Based on Multicycle
Tests",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "18:1--18:14",
month = jan,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3375278",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:34 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375278",
abstract = "The use of multicycle tests, with several functional
capture cycles between scan operations, contributes
significantly to the ability to compact a test set.
Multicycle tests have the added benefit that they can
contribute to the detection of defects \ldots{}",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Olney:2020:TFB,
author = "Brooks Olney and Robert Karam",
title = "Tunable {FPGA} Bitstream Obfuscation with {Boolean}
Satisfiability Attack Countermeasure",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "19:1--19:22",
month = mar,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3373638",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Mar 18 07:50:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3373638",
abstract = "Field Programmable Gate Arrays (FPGAs) are seeing a
surge in usage in many emerging application domains,
where the in-field reconfigurability is an attractive
characteristic for diverse applications with dynamic
design requirements, such as cloud \ldots{}",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yang:2020:HSS,
author = "Yajun Yang and Zhang Chen and Yuan Liu and Tsung-Yi Ho
and Yier Jin and Pingqiang Zhou",
title = "How Secure Is Split Manufacturing in Preventing
Hardware {Trojan}?",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "20:1--20:23",
month = mar,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3378163",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Mar 18 07:50:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378163",
abstract = "With the trend of outsourcing fabrication, split
manufacturing is regarded as a promising way to both
acquire the high-end nodes in untrusted external
foundries and protect the design from potential
attackers. However, in this article, we show that split
manufacturing is not inherently secure, that a hardware
Trojan attacker can still recover necessary information
with a proximity-based or a simulated-annealing-based
mapping approach together with a probability-based or
net-based pruning method at the placement level. We
further propose a defense approach by moving the
insecure gates away from their easily attacked
candidate locations. Results on benchmark circuits show
the effectiveness of our proposed methods.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pui:2020:LRB,
author = "Chak-Wa Pui and Evangeline F. Y. Young",
title = "{Lagrangian} Relaxation-Based Time-Division
Multiplexing Optimization for Multi-{FPGA} Systems",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "21:1--21:23",
month = mar,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3377551",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Mar 18 07:50:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377551",
abstract = "\<?tight?\>To increase the resource utilization
in multi-FPGA (field-programmable gate array) systems,
time-division multiplexing (TDM) is a widely used
technique to accommodate a large number of inter-FPGA
signals. However, with this technique, the delay
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yan:2020:SLO,
author = "Jin-Tai Yan",
title = "Single-Layer Obstacle-Aware Substrate Routing via
Iterative Pin Reassignment and Wire Assignment",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "22:1--22:21",
month = mar,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3378162",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Mar 18 07:50:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378162",
abstract = "It is known that single-layer obstacle-aware substrate
routing is necessary for modern IC/Package designs. In
this article, given a set of two-pin nets and a set of
rectangular obstacles inside a single-layer routing
plane, a two-phase routing algorithm \ldots{}",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sha:2020:FPT,
author = "Shi Sha and Ajinkya S. Bankar and Xiaokun Yang and
Wujie Wen and Gang Quan",
title = "On Fundamental Principles for Thermal-Aware Design on
Periodic Real-Time Multi-Core Systems",
journal = j-TODAES,
volume = "25",
number = "2",
pages = "23:1--23:23",
month = mar,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3378063",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Mar 18 07:50:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378063",
abstract = "With the exponential rise of the transistor count in
one chip, the thermal problem has become a pressing
issue in computing system design. While there have been
extensive methods and techniques published for design
optimization with thermal awareness, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Nath:2020:RDB,
author = "Arijit Nath and Sukarn Agarwal and Hemangee K.
Kapoor",
title = "Reuse Distance-based Victim Cache for Effective
Utilisation of Hybrid Main Memory System",
journal = j-TODAES,
volume = "25",
number = "3",
pages = "24:1--24:32",
month = may,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3380732",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue May 19 10:15:25 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380732",
abstract = "Hybrid main memories comprising DRAM and Non-volatile
memories (NVM) are projected as potential replacements
of the traditional DRAM-based memories. However,
traditional cache management policies designed for
improving the hit rate lack awareness of the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kamal:2020:ADF,
author = "Nishant Kamal and Ankur Gupta and Ananya Singla and
Shubham Tiwari and Parth Kohli and Sudip Roy and
Bhargab B. Bhattacharya",
title = "Architectural Design of Flow-Based Microfluidic
Biochips for Multi-Target Dilution of Biochemical
Fluids",
journal = j-TODAES,
volume = "25",
number = "3",
pages = "25:1--25:34",
month = may,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3357604",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue May 19 10:15:25 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3357604",
abstract = "Microfluidic technologies enable replacement of
time-consuming and complex steps of biochemical
laboratory protocols with a tiny chip. Sample
preparation (i.e., dilution or mixing of fluids) is one
of the primary tasks of any bioprotocol. In real-life
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Nahiyan:2020:SCF,
author = "Adib Nahiyan and Jungmin Park and Miao He and Yousef
Iskander and Farimah Farahmandi and Domenic Forte and
Mark Tehranipoor",
title = "{SCRIPT}: a {CAD} Framework for Power Side-channel
Vulnerability Assessment Using Information Flow
Tracking and Pattern Generation",
journal = j-TODAES,
volume = "25",
number = "3",
pages = "26:1--26:27",
month = may,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3383445",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue May 19 10:15:25 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383445",
abstract = "Power side-channel attacks (SCAs) have been proven to
be effective at extracting secret keys from hardware
implementations of cryptographic algorithms. Ideally,
the power side-channel leakage (PSCL) of hardware
designs of a cryptographic algorithm \ldots{}",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2020:SMB,
author = "Huili Chen and Seetal Potluri and Farinaz Koushanfar",
title = "Security of Microfluidic Biochip: Practical Attacks
and Countermeasures",
journal = j-TODAES,
volume = "25",
number = "3",
pages = "27:1--27:29",
month = may,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3382127",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue May 19 10:15:25 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3382127",
abstract = "With the advancement of system miniaturization and
automation, Lab-on-a-Chip (LoC) technology has
revolutionized traditional experimental procedures.
Microfluidic Biochip (MFB) is an emerging branch of LoC
with wide medical applications such as DNA \ldots{}",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mandal:2020:EAO,
author = "Sumit K. Mandal and Ganapati Bhat and Janardhan Rao
Doppa and Partha Pratim Pande and Umit Y. Ogras",
title = "An Energy-aware Online Learning Framework for Resource
Management in Heterogeneous Platforms",
journal = j-TODAES,
volume = "25",
number = "3",
pages = "28:1--28:26",
month = may,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3386359",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue May 19 10:15:25 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386359",
abstract = "Mobile platforms must satisfy the contradictory
requirements of fast response time and minimum energy
consumption as a function of dynamically changing
applications. To address this need, systems-on-chip
(SoC) that are at the heart of these devices \ldots{}",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2020:AFD,
author = "Mengyun Liu and Lixue Xia and Yu Wang and Krishnendu
Chakrabarty",
title = "Algorithmic Fault Detection for {RRAM}-based Matrix
Operations",
journal = j-TODAES,
volume = "25",
number = "3",
pages = "29:1--29:31",
month = may,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3386360",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue May 19 10:15:25 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386360",
abstract = "An RRAM-based computing system (RCS) provides an
energy-efficient hardware implementation of
vector--matrix multiplication for machine-learning
hardware. However, it is vulnerable to faults due to
the immature RRAM fabrication process. We propose an
efficient fault tolerance method for RCS; the proposed
method, referred to as extended-ABFT (X-ABFT), is
inspired by algorithm-based fault tolerance (ABFT). We
utilize row checksums and test-input vectors to extract
signatures for fault detection and error correction. We
present a solution to alleviate the overflow problem
caused by the limited number of voltage levels for the
test-input signals. Simulation results show that for a
Hopfield classifier with faults in 5\% of its RRAM
cells, X-ABFT allows us to achieve nearly the same
classification accuracy as in the fault-free case.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Paik:2020:GRT,
author = "Yoonah Paik and Seon Wook Kim and Dongha Jung and
Minseong Kim",
title = "Generating Representative Test Sequences from Real
Workload for Minimizing {DRAM} Verification Overhead",
journal = j-TODAES,
volume = "25",
number = "4",
pages = "30:1--30:23",
month = sep,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3391891",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 3 14:31:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3391891",
abstract = "Dynamic Random Access Memory (DRAM) standards have
evolved for higher bandwidth, larger capacity, and
lower power consumption, so their specifications have
become complicated to satisfy the design goals. These
complex implementations have significantly \ldots{}",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jana:2020:HHC,
author = "Rajib Lochan Jana and Soumyajit Dey and Pallab
Dasgupta",
title = "A Hierarchical {HVAC} Control Scheme for Energy-aware
Smart Building Automation",
journal = j-TODAES,
volume = "25",
number = "4",
pages = "31:1--31:33",
month = sep,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3393666",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 3 14:31:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3393666",
abstract = "Heating ventilation and air conditioning (HVAC)
systems usually account for the highest percentage of
overall energy usage in large-sized smart building
infrastructures. The performance of HVAC control
systems for large buildings strongly depend on the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chatterjee:2020:MLA,
author = "Urbi Chatterjee and Soumi Chatterjee and Debdeep
Mukhopadhyay and Rajat Subhra Chakraborty",
title = "Machine Learning Assisted {PUF} Calibration for
Trustworthy Proof of Sensor Data in {IoT}",
journal = j-TODAES,
volume = "25",
number = "4",
pages = "32:1--32:21",
month = sep,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3393628",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 3 14:31:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3393628",
abstract = "Remote integrity verification plays a paramount role
in resource-constraint devices owing to emerging
applications such as Internet-of-Things (IoT), smart
homes, e-health, and so on. The concept of Virtual
Proof of Reality (VPoR) proposed by R{\"u}hrmair et
al.\ldots{}",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Vijayan:2020:RIH,
author = "Arunkumar Vijayan and Mehdi B. Tahoori and Krishnendu
Chakrabarty",
title = "Runtime Identification of Hardware {Trojans} by
Feature Analysis on Gate-Level Unstructured Data and
Anomaly Detection",
journal = j-TODAES,
volume = "25",
number = "4",
pages = "33:1--33:23",
month = sep,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3391890",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 3 14:31:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3391890",
abstract = "As the globalization of chip design and manufacturing
process becomes popular, malicious hardware inclusions
such as hardware Trojans pose a serious threat to the
security of digital systems. Advanced Trojans can mask
many architectural-level Trojan \ldots{}",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Alasad:2020:SLO,
author = "Qutaiba Alasad and Jiann-Shuin Yuan and Pramod
Subramanyan",
title = "Strong Logic Obfuscation with Low Overhead against
{IC} Reverse Engineering Attacks",
journal = j-TODAES,
volume = "25",
number = "4",
pages = "34:1--34:31",
month = sep,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3398012",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 3 14:31:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3398012",
abstract = "Untrusted foundries pose threats of integrated circuit
(IC) piracy and counterfeiting, and this has motivated
research into logic locking. Strong logic locking
approaches potentially prevent piracy and
counterfeiting by preventing unauthorized \ldots{}",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Alam:2020:SSB,
author = "Md Mahbub Alam and Adib Nahiyan and Mehdi Sadi and
Domenic Forte and Mark Tehranipoor",
title = "{Soft-HaT}: Software-Based Silicon Reprogramming for
Hardware {Trojan} Implementation",
journal = j-TODAES,
volume = "25",
number = "4",
pages = "35:1--35:22",
month = sep,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3396521",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Sep 3 14:31:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3396521",
abstract = "A hardware Trojan is a malicious modification to an
integrated circuit (IC) made by untrusted third-party
vendors, fabrication facilities, or rogue designers.
Although existing hardware Trojans are designed to be
stealthy, they can, in theory, be \ldots{}",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Henkel:2020:ISI,
author = "J{\"o}rg Henkel and Hussam Amrouch and Marilyn Wolf",
title = "Introduction to the Special Issue on Machine Learning
for {CAD}",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "36:1--36:2",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3410864",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3410864",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Szentimrey:2020:MLC,
author = "Hannah Szentimrey and Abeer Al-Hyari and Jeremy
Foxcroft and Timothy Martin and David Noel and Gary
Grewal and Shawki Areibi",
title = "Machine Learning for Congestion Management and
Routability Prediction within {FPGA} Placement",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "37:1--37:25",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3373269",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3373269",
abstract = "Placement for Field Programmable Gate Arrays (FPGAs)
is one of the most important but time-consuming steps
for achieving design closure. This article proposes the
integration of three unique machine learning models
into the state-of-the-art analytic \ldots{}",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2020:FGA,
author = "Mengyun Liu and Renjian Pan and Fangming Ye and Xin Li
and Krishnendu Chakrabarty and Xinli Gu",
title = "Fine-grained Adaptive Testing Based on Quality
Prediction",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "38:1--38:25",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3385261",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3385261",
abstract = "The ever-increasing complexity of integrated circuits
inevitably leads to high test cost. Adaptive testing
provides an effective solution for test-cost reduction;
this testing framework selects the important test items
for each set of chips. However, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Last:2020:PMC,
author = "Felix Last and Max Haeberlein and Ulf Schlichtmann",
title = "Predicting Memory Compiler Performance Outputs Using
Feed-forward Neural Networks",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "39:1--39:19",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3385262",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3385262",
abstract = "Typical semiconductor chips include thousands of
mostly small memories. As memories contribute an
estimated 25\% to 40\% to the overall power,
performance, and area (PPA) of a product, memories must
be designed carefully to meet the system's
requirements. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Goli:2020:PAP,
author = "Mehran Goli and Rolf Drechsler",
title = "{PREASC}: Automatic Portion Resilience Evaluation for
Approximating {SystemC}-based Designs Using Regression
Analysis Techniques",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "40:1--40:28",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3388140",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3388140",
abstract = "The increasing functionality of electronic systems due
to the constant evolution of the market requirements
makes the non-functional aspects of such systems (e.g.,
energy consumption, area overhead, or performance) a
major concern in the design process. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Nasser:2020:NCM,
author = "Yehya Nasser and Carlo Sau and Jean-Christophe
Pr{\'e}votet and Tiziana Fanni and Francesca Palumbo
and Maryline H{\'e}lard and Luigi Raffo",
title = "{NeuPow}: a {CAD} Methodology for High-level Power
Estimation Based on Machine Learning",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "41:1--41:29",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3388141",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3388141",
abstract = "In this article, we present a new, simple, accurate,
and fast power estimation technique that can be used to
explore the power consumption of digital system designs
at an early design stage. We exploit the machine
learning techniques to aid the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dey:2020:MLA,
author = "Sukanta Dey and Sukumar Nandi and Gaurav Trivedi",
title = "Machine Learning Approach for Fast Electromigration
Aware Aging Prediction in Incremental Design of Large
Scale On-chip Power Grid Network",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "42:1--42:29",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3399677",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3399677",
abstract = "With the advancement of technology nodes,
Electromigration (EM) signoff has become increasingly
difficult, which requires a considerable amount of time
for an incremental change in the power grid (PG)
network design in a chip. The traditional Black's
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2020:TSD,
author = "Qicheng Huang and Chenlei Fang and Soumya Mittal and
R. D. (Shawn) Blanton",
title = "Towards Smarter Diagnosis: a Learning-based Diagnostic
Outcome Previewer",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "43:1--43:20",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3398267",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3398267",
abstract = "Given the inherent perturbations during the
fabrication process of integrated circuits that lead to
yield loss, diagnosis of failing chips is a mitigating
method employed during both yield ramping and
high-volume manufacturing for yield learning.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hu:2020:MLA,
author = "Yong Hu and Marcel Mettler and Daniel
Mueller-Gritschneder and Thomas Wild and Andreas
Herkersdorf and Ulf Schlichtmann",
title = "Machine Learning Approaches for Efficient Design Space
Exploration of Application-Specific {NoCs}",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "44:1--44:27",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3403584",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3403584",
abstract = "In many Multi-Processor Systems-on-Chip (MPSoCs),
traffic between cores is unbalanced. This motivates the
use of an application-specific Network-on-Chip (NoC)
that is customized and can provide a high performance
at low cost in terms of power and area. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2020:MFS,
author = "Yi Wang and Paul D. Franzon and David Smart and Brian
Swahn",
title = "Multi-Fidelity Surrogate-Based Optimization for
Electromagnetic Simulation Acceleration",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "45:1--45:21",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3398268",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3398268",
abstract = "As circuits' speed and frequency increase, fast and
accurate capture of the details of the parasitics in
metal structures, such as inductors and clock trees,
becomes more critical. However, conducting
high-fidelity 3D electromagnetic (EM) simulations
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Agnesina:2020:IFB,
author = "Anthony Agnesina and Sung Kyu Lim and Etienne Lepercq
and Jose {Escobedo Del Cid}",
title = "Improving {FPGA}-Based Logic Emulation Systems through
Machine Learning",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "46:1--46:20",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3399595",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3399595",
abstract = "We present a machine learning (ML) framework to
improve the use of computing resources in the FPGA
compilation step of a commercial FPGA-based logic
emulation flow. Our ML models enable highly accurate
predictability of the final place and route design
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xama:2020:MLB,
author = "Nektar Xama and Martin Andraud and Jhon Gomez and
Baris Esen and Wim Dobbelaere and Ronny Vanhooren and
Anthony Coyette and Georges Gielen",
title = "Machine Learning-based Defect Coverage Boosting of
Analog Circuits under Measurement Variations",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "47:1--47:27",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3408063",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3408063",
abstract = "Safety-critical and mission-critical systems, such as
airplanes or (semi-)autonomous cars, are relying on an
ever-increasing number of embedded integrated circuits.
Consequently, there is a need for complete defect
coverage during the testing of these \ldots{}",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2020:APA,
author = "Kang Liu and Haoyu Yang and Yuzhe Ma and Benjamin Tan
and Bei Yu and Evangeline F. Y. Young and Ramesh Karri
and Siddharth Garg",
title = "Adversarial Perturbation Attacks on {ML}-based {CAD}:
a Case Study on {CNN}-based Lithographic Hotspot
Detection",
journal = j-TODAES,
volume = "25",
number = "5",
pages = "48:1--48:31",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3408288",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Sat Oct 3 07:47:57 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3408288",
abstract = "There is substantial interest in the use of machine
learning (ML)-based techniques throughout the
electronic computer-aided design (CAD) flow,
particularly those based on deep learning. However,
while deep learning methods have surpassed
state-of-the-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hu:2020:EMN,
author = "X. Sharon Hu",
title = "Editorial: a Message from the New {Editor-in-Chief}",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "49e:1--49e:2",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3419376",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3419376",
acknowledgement = ack-nhfb,
articleno = "49e",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Torabi:2020:LAA,
author = "Mohammad Torabi and Lihong Zhang",
title = "{LDE}-aware Analog Layout Migration with
{OPC}-inclusive Routing",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "49:1--49:22",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3398190",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3398190",
abstract = "Performance degradation in analog circuits due to
layout dependent effects (LDEs) has become increasingly
challenging in advanced technologies. To address this
issue, LDEs have to be seriously considered as
performance constraints in the physical design
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ma:2020:MEF,
author = "Chenlin Ma and Yi Wang and Zhaoyan Shen and Renhai
Chen and Zhu Wang and Zili Shao",
title = "{MNFTL}: an Efficient Flash Translation Layer for {MLC
NAND} Flash Memory",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "50:1--50:19",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3398037",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3398037",
abstract = "The write constraints of Multi-Level Cell (MLC) NAND
flash memory make most of the existing flash
translation layer (FTL) schemes inefficient or
inapplicable. In this article, we solve several
fundamental problems in the design of MLC flash
translation \ldots{}",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lezos:2020:LOL,
author = "Christakis Lezos and Grigoris Dimitroulakos and
Ioannis Latifis and Konstantinos Masselos",
title = "A Locality Optimizer for Loop-dominated Applications
Based on Reuse Distance Analysis",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "51:1--51:26",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3398189",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3398189",
abstract = "Source code optimization can heavily improve software
code implementation quality while still being
complementary to conventional compilers' optimizations.
Source code analysis tools are very useful in
supporting source code optimization. This article
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tan:2020:EEG,
author = "Jingweijia Tan and Kaige Yan and Shuaiwen Leon Song
and Xin Fu",
title = "Energy-Efficient {GPU} {L2} Cache Design Using
Instruction-Level Data Locality Similarity",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "52:1--52:18",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3408060",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3408060",
abstract = "This article presents a novel energy-efficient cache
design for massively parallel, throughput-oriented
architectures like GPUs. Unlike L1 data cache on modern
GPUs, L2 cache shared by all of the streaming
multiprocessors is not the primary performance
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Charles:2020:RNC,
author = "Subodha Charles and Prabhat Mishra",
title = "Reconfigurable Network-on-Chip Security Architecture",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "53:1--53:25",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3406661",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3406661",
abstract = "Growth of the Internet-of-things has led to complex
system-on-chips (SoCs) being used in the edge devices
in IoT applications. The increased complexity is
demanding designers to consider several critical
factors, such as dynamic requirement changes,
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pendyala:2020:IAS,
author = "Shilpa Pendyala and Sheikh Ariful Islam and Srinivas
Katkoori",
title = "Interval Arithmetic and Self-Similarity Based {RTL}
Input Vector Control for Datapath Leakage
Minimization",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "54:1--54:26",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3408061",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3408061",
abstract = "With technology scaling, subthreshold leakage has
dominated the overall power consumption in a
design. Input vector control is an effective technique
to minimize subthreshold leakage. Low leakage input
vector determination is not often possible due to large
design space and simulation time. Similarly, applying
an appropriate minimum leakage vector (MLV) to each
Register Transfer Level (RTL) module instance in a
design often results in a low leakage state with
significant area overhead. In this work, we propose a
top-down and bottom-up approach for propagating the
input vector interval to identify low leakage input
vector at primary inputs of an RTL datapath. For each
module, via Monte Carlo simulation, we identify a set
of MLV intervals such that maximum leakage is within
(say) 10\% of the lowest leakage points. As the module
bit width increases, exhaustive simulation to find the
low leakage vector is not feasible. Further, we need to
uniformly search the entire input space to obtain as
many low leakage intervals as possible. Based on
empirical observations, we observe self-similarity in
the subthreshold leakage distribution of adder\slash
multiplier modules with highly regular bit-slice
architectures when input space is partitioned into
smaller cells. This property enables the uniform search
of low leakage vectors in the entire input space where
the time taken for characterization increases linearly
with the module size. We further process the reduced
interval set with simulated annealing to arrive at the
best low-leakage vector at the primary inputs. We also
propose to reduce area overhead (in some cases to 0\%)
by choosing Primary Input (PI) MLVs such that resultant
inputs to internal nodes are also MLVs. Compared to
existing work, experimental results for DSP filters
simulated in 16nm technology demonstrated leakage
savings of 93.6\% and 89.2\% for top-down and bottom-up
approaches with no area overhead.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chi:2020:WLO,
author = "Hao Yu Chi and Chien Nan Jimmy Liu and Hung Ming
Chen",
title = "Wire Load Oriented Analog Routing with Matching
Constraints",
journal = j-TODAES,
volume = "25",
number = "6",
pages = "55:1--55:26",
month = oct,
year = "2020",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3403932",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Oct 13 08:53:01 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3403932",
abstract = "As design complexity is increased exponentially,
electronic design automation (EDA) tools are essential
to reduce design efforts. However, the analog layout
design has still been done manually for decades because
it is a sensitive and error-prone task. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Goel:2021:MNN,
author = "Abhinav Goel and Sara Aghajanzadeh and Caleb Tung and
Shuo-Han Chen and George K. Thiruvathukal and
Yung-Hsiang Lu",
title = "Modular Neural Networks for Low-Power Image
Classification on Embedded Devices",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "1:1--1:35",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3408062",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3408062",
abstract = "Embedded devices are generally small, battery-powered
computers with limited hardware resources. It is
difficult to run deep neural networks (DNNs) on these
devices, because DNNs perform millions of operations
and consume significant amounts of energy. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Roy:2021:FAA,
author = "Indrani Roy and Chester Rebeiro and Aritra Hazra and
Swarup Bhunia",
title = "{FaultDroid}: an Algorithmic Approach for
Fault-Induced Information Leakage Analysis",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "2:1--2:27",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3410336",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3410336",
abstract = "Fault attacks belong to a potent class of
implementation-based attacks that can compromise a
crypto-device within a few milliseconds. Out of the
large numbers of faults that can occur in the device,
only a very few are exploitable in terms of leaking
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2021:MNI,
author = "Jun Li and Bowen Huang and Zhibing Sha and Zhigang Cai
and Jianwei Liao and Balazs Gerofi and Yutaka
Ishikawa",
title = "Mitigating Negative Impacts of Read Disturb in
{SSDs}",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "3:1--3:24",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3410332",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3410332",
abstract = "Read disturb is a circuit-level noise in solid-state
drives (SSDs), which may corrupt existing data in SSD
blocks and then cause high read error rate and longer
read latency. The approach of read refresh is commonly
used to avoid read disturb errors by \ldots{}",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mondal:2021:IFS,
author = "Ankit Mondal and Ankur Srivastava",
title = "{Ising-FPGA}: a Spintronics-based Reconfigurable
{Ising} Model Solver",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "4:1--4:27",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3411511",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3411511",
abstract = "The Ising model has been explored as a framework for
modeling NP-hard problems, with several diverse systems
proposed to solve it. The Magnetic Tunnel Junction-
(MTJ) based Magnetic RAM is capable of replacing CMOS
in memory chips. In this article, we \ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rokni:2021:TMS,
author = "Seyed Ali Rokni and Marjan Nourollahi and Parastoo
Alinia and Iman Mirzadeh and Mahdi Pedram and Hassan
Ghasemzadeh",
title = "{TransNet}: Minimally Supervised Deep Transfer
Learning for Dynamic Adaptation of Wearable Systems",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "5:1--5:31",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3414062",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3414062",
abstract = "Wearables are poised to transform health and wellness
through automation of cost-effective, objective, and
real-time health monitoring. However, machine learning
models for these systems are designed based on labeled
data collected, and feature \ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Islam:2021:HLS,
author = "Sheikh Ariful Islam and Love Kumar Sah and Srinivas
Katkoori",
title = "High-Level Synthesis of Key-Obfuscated {RTL IP} with
Design Lockout and Camouflaging",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "6:1--6:35",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3410337",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3410337",
abstract = "We propose three orthogonal techniques to secure
Register-Transfer-Level (RTL) Intellectual Property
(IP). In the first technique, the key-based RTL
obfuscation scheme is proposed at an early design phase
during High-Level Synthesis (HLS). Given a \ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Poddar:2021:RMT,
author = "Sudip Poddar and Tapalina Banerjee and Robert Wille
and Bhargab B. Bhattacharya",
title = "Robust Multi-Target Sample Preparation on {MEDA}
Biochips Obviating Waste Production",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "7:1--7:29",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3414061",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3414061",
abstract = "Digital microfluidic biochips have fueled a paradigm
shift in implementing bench-top laboratory experiments
on a single tiny chip, thus replacing costly and bulky
equipment. However, because of imprecise fluidic
functions, several volumetric split \ldots{}",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhang:2021:DPR,
author = "Ying Zhang and Xinpeng Hong and Zhongsheng Chen and
Zebo Peng and Jianhui Jiang",
title = "A Deterministic-Path Routing Algorithm for Tolerating
Many Faults on Very-Large-Scale Network-on-Chip",
journal = j-TODAES,
volume = "26",
number = "1",
pages = "8:1--8:26",
month = jan,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3414060",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:13 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3414060",
abstract = "Very-large-scale network-on-chip (VLS-NoC) has become
a promising fabric for supercomputers, but this fabric
may encounter the many-fault problem. This article
proposes a deterministic routing algorithm to tolerate
the effects of many faults in VLS-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Goncalves:2021:SAT,
author = "St{\`e}phano M. M. Gon{\c{c}}alves and Leomar S. da
Rosa Jr and Felipe S. Marques",
title = "{SmartDR}: Algorithms and Techniques for Fast Detailed
Routing with Good Design Rule Handling",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "9:1--9:38",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3417133",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3417133",
abstract = "Detailed routing is one of the most time-consuming
steps of physical synthesis of integrated circuits.
Also, it is very challenging due to the complexity of
the design rules that the router must obey. In this
article, we present SmartDR, a detailed \ldots{}",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liao:2021:EPA,
author = "Tuotian Liao and Lihong Zhang",
title = "Efficient Parasitic-aware $ g^m $ \slash {$ I^D $} ---
based Hybrid Sizing Methodology for Analog and {RF}
Integrated Circuits",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "10:1--10:31",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3416946",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3416946",
abstract = "As the primary second-order effect, parasitic issues
have to be seriously addressed when synthesizing
high-performance analog and RF integrated circuits
(ICs). In this article, a two-phase hybrid sizing
methodology for analog and RF ICs is proposed to
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2021:CPO,
author = "Nan Wu and Lei Deng and Guoqi Li and Yuan Xie",
title = "Core Placement Optimization for Multi-chip Many-core
Neural Network Systems with Reinforcement Learning",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "11:1--11:27",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3418498",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3418498",
abstract = "Multi-chip many-core neural network systems are
capable of providing high parallelism benefited from
decentralized execution, and they can be scaled to very
large systems with reasonable fabrication costs. As
multi-chip many-core systems scale up, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Siddhu:2021:LAD,
author = "Lokesh Siddhu and Rajesh Kedia and Preeti Ranjan
Panda",
title = "Leakage-Aware Dynamic Thermal Management of {$3$D}
Memories",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "12:1--12:31",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3419468",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3419468",
abstract = "3D memory systems offer several advantages in terms of
area, bandwidth, and energy efficiency. However,
thermal issues arising out of higher power densities
have limited their widespread use. While prior works
have looked at reducing dynamic power \ldots{}",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ghosh:2021:PDP,
author = "Sumana Ghosh and Soumyajit Dey and Pallab Dasgupta",
title = "Performance-Driven Post-Processing of Control Loop
Execution Schedules",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "13:1--13:27",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3421505",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3421505",
abstract = "The increasing demand for mapping diverse embedded
features onto shared electronic control units has
brought about novel ways to co-design control tasks and
their schedules. These techniques replace traditional
implementations of control with new \ldots{}",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Luo:2021:TMF,
author = "Yingyi Luo and Joshua C. Zhao and Arnav Aggarwal and
Seda Ogrenci-Memik and Kazutomo Yoshii",
title = "Thermal Management for {FPGA} Nodes in {HPC} Systems",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "14:1--14:17",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3423494",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3423494",
abstract = "The integration of FPGAs into large-scale computing
systems is gaining attention. In these systems,
real-time data handling for networking, tasks for
scientific computing, and machine learning can be
executed with customized datapaths on reconfigurable
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2021:RMB,
author = "Jianli Chen and Ziran Zhu and Wenxing Zhu and Chang
Yao-Wen",
title = "A Robust Modulus-Based Matrix Splitting Iteration
Method for Mixed-Cell-Height Circuit Legalization",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "15:1--15:28",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3423326",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3423326",
abstract = "Modern circuits often contain standard cells of
different row heights to meet various design
requirements. Taller cells give larger drive strengths
and higher speed at the cost of larger areas and power.
Multi-row height standard cells incur challenging
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Arka:2021:HHM,
author = "Aqeeb Iqbal Arka and Biresh Kumar Joardar and Ryan
Gary Kim and Dae Hyun Kim and Janardhan Rao Doppa and
Partha Pratim Pande",
title = "{HeM$3$D}: Heterogeneous Manycore Architecture Based
on Monolithic {$3$D} Vertical Integration",
journal = j-TODAES,
volume = "26",
number = "2",
pages = "16:1--16:21",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3424239",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:14 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3424239",
abstract = "Heterogeneous manycore architectures are the key to
efficiently execute compute- and data-intensive
applications. Through-silicon-via (TSV)-based 3D
manycore system is a promising solution in this
direction as it enables the integration of disparate
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Deb:2021:CRC,
author = "Dipika Deb and John Jose and Maurizio Palesi",
title = "{COPE}: Reducing Cache Pollution and Network
Contention by Inter-tile Coordinated Prefetching in
{NoC}-based {MPSoCs}",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "17:1--17:31",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3428149",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3428149",
abstract = "Prefetching helps in reducing the memory access
latency in multi-banked NUCA architecture, where the
Last Level Cache (LLC) is shared. In such systems, an
application running on core generates significant
traffic on the shared resources, the underlying
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Letras:2021:MOO,
author = "Martin Letras and Joachim Falk and Tobias Schwarzer
and J{\"u}rgen Teich",
title = "Multi-objective Optimization of Mapping Dataflow
Applications to {MPSoCs} Using a Hybrid Evaluation
Combining Analytic Models and Measurements",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "18:1--18:33",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3431814",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3431814",
abstract = "Dataflow modeling is well suited for a large variety
of applications for modern multi-core architectures,
e.g., from the signal processing and the control
domain. Furthermore, Design Space Exploration (DSE) can
be used to explore mappings of tasks to \ldots{}",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2021:LDH,
author = "Irith Pomeranz and M. Enamul Amyeen",
title = "Logic Diagnosis with Hybrid Fail Data",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "19:1--19:13",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3433929",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3433929",
abstract = "Yield improvement requires information about the
defects present in faulty units. This information is
derived by applying a logic diagnosis procedure to the
fail data collected by a tester from faulty units. It
is typical in the early stages of yield \ldots{}",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ince:2021:FBB,
author = "Mehmet Ince and Ender Yilmaz and Wei Fu and Joonsung
Park and Krishnaswamy Nagaraj and Leroy Winemberg and
Sule Ozev",
title = "Fault-based Built-in Self-test and Evaluation of Phase
Locked Loops",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "20:1--20:18",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3427911",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3427911",
abstract = "With the increasing pressure to obtain near-zero
defect rates for the automotive industry, there is a
need to explore built-in self-test and other
non-traditional test techniques for embedded
mixed-signal components, such as PLLs, DC-DC
converters, and \ldots{}",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gebregirogis:2021:ALF,
author = "Anteneh Gebregirogis and Mehdi Tahoori",
title = "Approximate Learning and Fault-Tolerant Mapping for
Energy-Efficient Neuromorphic Systems",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "21:1--21:23",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3436491",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3436491",
abstract = "Brain-inspired deep neural networks such as
Convolutional Neural Network (CNN) have shown great
potential in solving difficult cognitive problems such
as object recognition and classification. However, such
architectures have high computational energy \ldots{}",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lyu:2021:MSC,
author = "Yangdi Lyu and Prabhat Mishra",
title = "{MaxSense}: Side-channel Sensitivity Maximization for
{Trojan} Detection Using Statistical Test Patterns",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "22:1--22:21",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3436820",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3436820",
abstract = "Detection of hardware Trojans is vital to ensure the
security and trustworthiness of System-on-Chip (SoC)
designs. Side-channel analysis is effective for Trojan
detection by analyzing various side-channel signatures
such as power, current, and delay. In \ldots{}",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2021:CTH,
author = "Irith Pomeranz",
title = "Covering Test Holes of Functional Broadside Tests",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "23:1--23:15",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3441282",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3441282",
abstract = "Functional broadside tests were developed to avoid
overtesting of delay faults. The tests achieve this
goal by creating functional operation conditions during
their functional capture cycles. To increase the
achievable fault coverage, close-to-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Roy:2021:MLS,
author = "Urmimala Roy and Tanmoy Pramanik and Subhendu Roy and
Avhishek Chatterjee and Leonard F. Register and Sanjay
K. Banerjee",
title = "Machine Learning for Statistical Modeling: The Case of
Perpendicular Spin-Transfer-Torque Random Access
Memory",
journal = j-TODAES,
volume = "26",
number = "3",
pages = "24:1--24:17",
month = feb,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3440014",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 25 10:17:15 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3440014",
abstract = "We propose a methodology to perform process
variation-aware device and circuit design using fully
physics-based simulations within limited computational
resources, without developing a compact model. Machine
learning (ML), specifically a support vector \ldots{}",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2021:EFU,
author = "Irith Pomeranz",
title = "Equivalent Faults under Launch-on-Shift {(LOS)} Tests
with Equal Primary Input Vectors",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "25:1--25:15",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3440013",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3440013",
abstract = "A recent work showed that it is possible to transform
a single-cycle test for stuck-at faults into a
launch-on-shift (LOS) test that is guaranteed to detect
the same stuck-at faults without any logic or fault
simulation. The LOS test also detects \ldots{}",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Witharana:2021:DTG,
author = "Hasini Witharana and Yangdi Lyu and Prabhat Mishra",
title = "Directed Test Generation for Activation of Security
Assertions in {RTL} Models",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "26:1--26:28",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3441297",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3441297",
abstract = "Assertions are widely used for functional validation
as well as coverage analysis for both software and
hardware designs. Assertions enable runtime error
detection as well as faster localization of errors.
While there is a vast literature on both \ldots{}",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mohammadzadeh:2021:EOP,
author = "Naser Mohammadzadeh and Robert Wille and Oliver
Keszocze",
title = "Efficient One-pass Synthesis for Digital Microfluidic
Biochips",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "27:1--27:21",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3446880",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3446880",
abstract = "Digital microfluidics biochips are a promising
emerging technology that provides fluidic experimental
capabilities on a chip (i.e., following the
lab-on-a-chip paradigm). However, the design of such
biochips still constitutes a challenging task that is
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jain:2021:TTA,
author = "Ayush Jain and Ziqi Zhou and Ujjwal Guin",
title = "{TAAL}: Tampering Attack on Any Key-based Logic Locked
Circuits",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "28:1--28:22",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3442379",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3442379",
abstract = "Due to the globalization of semiconductor
manufacturing and test processes, the system-on-a-chip
(SoC) designers no longer design the complete SoC and
manufacture chips on their own. This outsourcing of the
design and manufacturing of Integrated \ldots{}",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rahman:2021:SAD,
author = "M. Sazadur Rahman and Adib Nahiyan and Fahim Rahman
and Saverio Fazzari and Kenneth Plaks and Farimah
Farahmandi and Domenic Forte and Mark Tehranipoor",
title = "Security Assessment of Dynamically Obfuscated Scan
Chain Against Oracle-guided Attacks",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "29:1--29:27",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3444960",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3444960",
abstract = "Logic locking has emerged as a promising solution to
protect integrated circuits against piracy and
tampering. However, the security provided by existing
logic locking techniques is often thwarted by Boolean
satisfiability (SAT)-based oracle-guided \ldots{}",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sinha:2021:DSO,
author = "Mitali Sinha and Gade Sri Harsha and Pramit
Bhattacharyya and Sujay Deb",
title = "Design Space Optimization of Shared Memory
Architecture in Accelerator-rich Systems",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "30:1--30:31",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3446001",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3446001",
abstract = "Shared memory architectures, as opposed to
private-only memories, provide a viable alternative to
meet the ever-increasing memory requirements of
multi-accelerator systems to achieve high performance
under stringent area and energy constraints. However,
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Palchaudhuri:2021:DAT,
author = "Ayan Palchaudhuri and Sandeep Sharma and Anindya
Sundar Dhar",
title = "Design Automation for Tree-based Nearest
Neighborhood-aware Placement of High-speed Cellular
Automata on {FPGA} with Scan Path Insertion",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "31:1--31:34",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3446206",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3446206",
abstract = "Cellular Automata (CA) is attractive for high-speed
VLSI implementation due to modularity, cascadability,
and locality of interconnections confined to
neighboring logic cells. However, this outcome is not
easily transferable to tree-structured CA, since
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yellu:2021:STA,
author = "Pruthvy Yellu and Landon Buell and Miguel Mark and
Michel A. Kinsy and Dongpeng Xu and Qiaoyan Yu",
title = "Security Threat Analyses and Attack Models for
Approximate Computing Systems: From Hardware and
Micro-architecture Perspectives",
journal = j-TODAES,
volume = "26",
number = "4",
pages = "32:1--32:31",
month = apr,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3442380",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Apr 27 08:06:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3442380",
abstract = "Approximate computing (AC) represents a paradigm shift
from conventional precise processing to inexact
computation but still satisfying the system requirement
on accuracy. The rapid progress on the development of
diverse AC techniques allows us to apply \ldots{}",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jayasinghe:2021:QQB,
author = "Darshana Jayasinghe and Aleksandar Ignjatovic and
Roshan Ragel and Jude Angelo Ambrose and Sri
Parameswaran",
title = "{QuadSeal}: Quadruple Balancing to Mitigate Power
Analysis Attacks with Variability Effects and
Electromagnetic Fault Injection Attacks",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "33:1--33:36",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3443706",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3443706",
abstract = "Side channel analysis attacks employ the emanated side
channel information to deduce the secret keys from
cryptographic implementations by analyzing the power
traces during execution or scrutinizing faulty outputs.
To be effective, a countermeasure must \ldots{}",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2021:DHC,
author = "Chin-Hsien Wu and Hao-Wei Zhang and Chia-Wei Liu and
Ta-Ching Yu and Chi-Yen Yang",
title = "A Dynamic {Huffman} Coding Method for Reliable {TLC
NAND} Flash Memory",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "34:1--34:25",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3446771",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3446771",
abstract = "With the progress of the manufacturing process, NAND
flash memory has evolved from the single-level cell and
multi-level cell into the triple-level cell (TLC). NAND
flash memory has physical problems such as the
characteristic of erase-before-write and \ldots{}",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jeong:2021:DMB,
author = "Eunjin Jeong and Dowhan Jeong and Soonhoi Ha",
title = "Dataflow Model-based Software Synthesis Framework for
Parallel and Distributed Embedded Systems",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "35:1--35:38",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3447680",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3447680",
abstract = "Existing software development methodologies mostly
assume that an application runs on a single device
without concern about the non-functional requirements
of an embedded system such as latency and resource
consumption. Besides, embedded software is \ldots{}",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xie:2021:DFM,
author = "Guoqi Xie and Hao Peng and Xiongren Xiao and Yao Liu
and Renfa Li",
title = "Design Flow and Methodology for Dynamic and Static
Energy-constrained Scheduling Framework in
Heterogeneous Multicore Embedded Devices",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "36:1--36:18",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3450448",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3450448",
abstract = "With Internet of things technologies, billions of
embedded devices, including smart gateways, smart
phones, and mobile robots, are connected and deeply
integrated. Almost all these embedded devices are
battery-constrained and energy-limited systems. In
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Park:2021:PPD,
author = "Heechun Park and Bon Woong Ku and Kyungwook Chang and
Da Eun Shim and Sung Kyu Lim",
title = "Pseudo-{$3$D} Physical Design Flow for Monolithic
{$3$D} {ICs}: Comparisons and Enhancements",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "37:1--37:25",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3453480",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3453480",
abstract = "Studies have shown that monolithic 3D (M3D) ICs
outperform the existing through-silicon-via (TSV)
-based 3D ICs in terms of power, performance, and area
(PPA) metrics, primarily due to the orders of magnitude
denser vertical interconnections offered by \ldots{}",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hassanpourghadi:2021:MLG,
author = "Mohsen Hassanpourghadi and Rezwan A. Rasul and Mike
Shuo-Wei Chen",
title = "A Module-Linking Graph Assisted Hybrid Optimization
Framework for Custom Analog and Mixed-Signal Circuit
Parameter Synthesis",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "38:1--38:22",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3456722",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3456722",
abstract = "Analog and mixed-signal (AMS) computer-aided design
tools are of increasing interest owing to demand for
the wide range of AMS circuit specifications in the
modern system on a chip and faster time to market
requirement. Traditionally, to accelerate the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Feng:2021:FRT,
author = "Lang Feng and Jeff Huang and Jiang Hu and Abhijith
Reddy",
title = "{FastCFI}: Real-time Control-Flow Integrity Using
{FPGA} without Code Instrumentation",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "39:1--39:39",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3458471",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3458471",
abstract = "Control-Flow Integrity (CFI) is an effective defense
technique against a variety of memory-based cyber
attacks. CFI is usually enforced through software
methods, which entail considerable performance
overhead. Hardware-based CFI techniques can largely
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2021:MLE,
author = "Guyue Huang and Jingbo Hu and Yifan He and Jialong Liu
and Mingyuan Ma and Zhaoyang Shen and Juejian Wu and
Yuanfan Xu and Hengrui Zhang and Kai Zhong and Xuefei
Ning and Yuzhe Ma and Haoyu Yang and Bei Yu and
Huazhong Yang and Yu Wang",
title = "Machine Learning for Electronic Design Automation: a
Survey",
journal = j-TODAES,
volume = "26",
number = "5",
pages = "40:1--40:46",
month = jun,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3451179",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Jun 22 08:18:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3451179",
abstract = "With the down-scaling of CMOS technology, the design
complexity of very large-scale integrated is
increasing. Although the application of machine
learning (ML) techniques in electronic design
automation (EDA) can trace its history back to the
1990s, the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chattopadhyay:2021:CCP,
author = "Saranyu Chattopadhyay and Pranesh Santikellur and
Rajat Subhra Chakraborty and Jimson Mathew and Marco
Ottavi",
title = "A Conditionally Chaotic Physically Unclonable Function
Design Framework with High Reliability",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "41:1--41:24",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460004",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460004",
abstract = "Physically Unclonable Function (PUF) circuits are
promising low-overhead hardware security primitives,
but are often gravely susceptible to machine
learning-based modeling attacks. Recently, chaotic PUF
circuits have been proposed that show greater
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jiang:2021:PDM,
author = "Chen Jiang and Bo Yuan and Tsung-Yi Ho and Xin Yao",
title = "Placement of Digital Microfluidic Biochips via a New
Evolutionary Algorithm",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "42:1--42:22",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460230",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460230",
abstract = "Digital microfluidic biochips (DMFBs) have been a
revolutionary platform for automating and miniaturizing
laboratory procedures with the advantages of
flexibility and reconfigurability. The placement
problem is one of the most challenging issues in the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gnad:2021:VBC,
author = "Dennis R. E. Gnad and Cong Dang Khoa Nguyen and Syed
Hashim Gillani and Mehdi B. Tahoori",
title = "Voltage-Based Covert Channels Using {FPGAs}",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "43:1--43:25",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460229",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460229",
abstract = "Field Programmable Gate Arrays (FPGAs) are
increasingly used in cloud applications and being
integrated into Systems-on-Chip. For these systems,
various side-channel attacks on cryptographic
implementations have been reported, motivating one to
apply \ldots{}",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ning:2021:FND,
author = "Xuefei Ning and Guangjun Ge and Wenshuo Li and Zhenhua
Zhu and Yin Zheng and Xiaoming Chen and Zhen Gao and Yu
Wang and Huazhong Yang",
title = "{FTT-NAS}: Discovering Fault-tolerant Convolutional
Neural Architecture",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "44:1--44:24",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460288",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460288",
abstract = "With the fast evolvement of embedded deep-learning
computing systems, applications powered by deep
learning are moving from the cloud to the edge. When
deploying neural networks (NNs) onto the devices under
complex environments, there are various types of
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lu:2021:RRD,
author = "Anni Lu and Xiaochen Peng and Yandong Luo and Shanshi
Huang and Shimeng Yu",
title = "A Runtime Reconfigurable Design of
Compute-in-Memory-Based Hardware Accelerator for Deep
Learning Inference",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "45:1--45:18",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460436",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460436",
abstract = "Compute-in-memory (CIM) is an attractive solution to
address the ``memory wall'' challenges for the
extensive computation in deep learning hardware
accelerators. For custom ASIC design, a specific chip
instance is restricted to a specific network during
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Roy:2021:FVS,
author = "Pushpita Roy and Ansuman Banerjee",
title = "A Framework for Validation of Synthesized
{MicroElectrode} Dot Array Actuations for Digital
Microfluidic Biochips",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "46:1--46:36",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460437",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460437",
abstract = "Digital Microfluidics is an emerging technology for
automating laboratory procedures in biochemistry. With
more and more complex biochemical protocols getting
mapped to biochip devices and microfluidics receiving a
wide adoption, it is becoming \ldots{}",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2021:VAH,
author = "Xi Li and Soheil Nazar Shahsavani and Xuan Zhou and
Massoud Pedram and Peter A. Beerel",
title = "A Variation-aware Hold Time Fixing Methodology for
Single Flux Quantum Logic Circuits",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "47:1--47:17",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460289",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460289",
abstract = "Single flux quantum (SFQ) logic is a promising
technology to replace complementary
metal-oxide-semiconductor logic for future exa-scale
supercomputing but requires the development of reliable
EDA tools that are tailored to the unique
characteristics of \ldots{}",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Park:2021:HTN,
author = "Naebeom Park and Sungju Ryu and Jaeha Kung and
Jae-Joon Kim",
title = "High-throughput Near-Memory Processing on {CNNs} with
{$3$D} {HBM}-like Memory",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "48:1--48:20",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460971",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460971",
abstract = "This article discusses the high-performance
near-memory neural network (NN) accelerator
architecture utilizing the logic die in
three-dimensional (3D) High Bandwidth Memory- (HBM)
like memory. As most of the previously reported 3D
memory-based near-memory \ldots{}",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Maleki:2021:EEI,
author = "Mohammad-Ali Maleki and Alireza Nabipour-Meybodi and
Mehdi Kamal and Ali Afzali-Kusha and Massoud Pedram",
title = "An Energy-Efficient Inference Method in Convolutional
Neural Networks Based on Dynamic Adjustment of the
Pruning Level",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "49:1--49:20",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3460972",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3460972",
abstract = "In this article, we present a low-energy inference
method for convolutional neural networks in image
classification applications. The lower energy
consumption is achieved by using a highly pruned
(lower-energy) network if the resulting network can
provide \ldots{}",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lin:2021:DAS,
author = "Dave Y.-W. Lin and Charles H.-P. Wen",
title = "A Delay-Adjustable, Self-Testable Flip-Flop for
Soft-Error Tolerability and Delay-Fault Testability",
journal = j-TODAES,
volume = "26",
number = "6",
pages = "50:1--50:12",
month = nov,
year = "2021",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3462171",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Aug 19 08:44:49 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3462171",
abstract = "As the demand of safety-critical applications (e.g.,
automobile electronics) increases, various
radiation-hardened flip-flops are proposed for
enhancing design reliability. Among all flip-flops,
Delay-Adjustable D-Flip-Flop (DAD-FF) is specialized in
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Polychronou:2022:CSA,
author = "Nikolaos-Foivos Polychronou and Pierre-Henri Thevenon
and Maxime Puys and Vincent Beroulle",
title = "A Comprehensive Survey of Attacks without Physical
Access Targeting Hardware Vulnerabilities in {IoT\slash
IIoT} Devices, and Their Detection Mechanisms",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "1:1--1:35",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3471936",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3471936",
abstract = "With the advances in the field of the Internet of
Things (IoT) and Industrial IoT (IIoT), these devices
are increasingly used in daily life or industry. To
reduce costs related to the time required to develop
these devices, security features are usually \ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gade:2022:NHC,
author = "Sri Harsha Gade and Sujay Deb",
title = "A Novel Hybrid Cache Coherence with Global Snooping
for Many-core Architectures",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "2:1--2:31",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3462775",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3462775",
abstract = "Cache coherence ensures correctness of cached data in
multi-core processors. Traditional implementations of
existing protocols make them unscalable for many core
architectures. While snoopy coherence requires
unscalable ordered networks, directory \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Han:2022:EEF,
author = "Ding Han and Guohui Li and Quan Zhou and Jianjun Li
and Yong Yang and Xiaofei Hu",
title = "An Efficient Execution Framework of Two-Part Execution
Scenario Analysis",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "3:1--3:24",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3465474",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3465474",
abstract = "Response Time Analysis (RTA) is an important and
promising technique for analyzing the schedulability of
real-time tasks under both Global Fixed-Priority (G-FP)
scheduling and Global Earliest Deadline First (G-EDF)
scheduling. Most existing RTA methods \ldots{}",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{He:2022:DME,
author = "Jingyu He and Yao Xiao and Corina Bogdan and Shahin
Nazarian and Paul Bogdan",
title = "A Design Methodology for Energy-Aware Processing in
Unmanned Aerial Vehicles",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "4:1--4:20",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3470451",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3470451",
abstract = "Unmanned Aerial Vehicles (UAVs) have rapidly become
popular for monitoring, delivery, and actuation in many
application domains such as environmental management,
disaster mitigation, homeland security, energy,
transportation, and manufacturing. However, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cui:2022:ILD,
author = "Lanlan Cui and Fei Wu and Xiaojian Liu and Meng Zhang
and Renzhi Xiao and Changsheng Xie",
title = "Improving {LDPC} Decoding Performance for {$3$D TLC
NAND} Flash by {LLR} Optimization Scheme for Hard and
Soft Decision",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "5:1--5:20",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3473305",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3473305",
abstract = "Low-density parity-check (LDPC) codes have been widely
adopted in NAND flash in recent years to enhance data
reliability. There are two types of decoding,
hard-decision and soft-decision decoding. However, for
the two types, their error correction \ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2022:NSI,
author = "Bo Li and Guoyong Shi",
title = "A Native {SPICE} Implementation of Memristor Models
for Simulation of Neuromorphic Analog Signal Processing
Circuits",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "6:1--6:24",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3474364",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3474364",
abstract = "Since the memristor emerged as a programmable analog
storage device, it has stimulated research on the
design of analog/mixed-signal circuits with the
memristor as the enabler of in-memory computation. Due
to the difficulty in evaluating the circuit-level
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Poddar:2022:DDM,
author = "Sudip Poddar and Sukanta Bhattacharjee and Shao-Yun
Fang and Tsung-Yi Ho and B. B. Bhattacharya",
title = "Demand-Driven Multi-Target Sample Preparation on
Resource-Constrained Digital Microfluidic Biochips",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "7:1--7:21",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3474392",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3474392",
abstract = "Microfluidic lab-on-chips offer promising technology
for the automation of various biochemical laboratory
protocols on a minuscule chip. Sample preparation (SP)
is an essential part of any biochemical experiments,
which aims to produce dilution of a \ldots{}",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2022:FIA,
author = "Qiang Liu and Honghui Tang and Peiran Zhang",
title = "Fault Injection Attack Emulation Framework for Early
Evaluation of {IC} Designs",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "8:1--8:25",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3480962",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3480962",
abstract = "Fault injection attack (FIA) has become a serious
threat to the confidentiality and fault tolerance of
integrated circuits (ICs). Circuit designers need an
effective method to evaluate the countermeasures of the
IC designs against the FIAs at the design \ldots{}",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ge:2022:SBN,
author = "Mengke Ge and Xiaobing Ni and Xu Qi and Song Chen and
Jinglei Huang and Yi Kang and Feng Wu",
title = "Synthesizing Brain-network-inspired Interconnections
for Large-scale Network-on-chips",
journal = j-TODAES,
volume = "27",
number = "1",
pages = "9:1--9:30",
month = jan,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3480961",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jan 7 08:25:44 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3480961",
abstract = "Brain network is a large-scale complex network with
scale-free, small-world, and modularity properties,
which largely supports this high-efficiency massive
system. In this article, we propose to synthesize
brain-network-inspired interconnections for large-.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Alaghi:2022:ISI,
author = "Armin Alaghi and Eva Darulova and Andreas Gerstlauer
and Phillip Stanley-Marbell",
title = "Introduction to the Special Issue on Approximate
Systems",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "10:1--10:2",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3488726",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3488726",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bu:2022:TFG,
author = "Tiancong Bu and Kaige Yan and Jingweijia Tan",
title = "Towards Fine-Grained Online Adaptive Approximation
Control for Dense {SLAM} on Embedded {GPUs}",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "11:1--11:19",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3486612",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3486612",
abstract = "Dense SLAM is an important application on an embedded
environment. However, embedded platforms usually fail
to provide enough computation resources for
high-accuracy real-time dense SLAM, even with
high-parallelism architecture such as GPUs. To tackle
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Singh:2022:PFE,
author = "Somesh Singh and Tejas Shah and Rupesh Nasre",
title = "{ParTBC}: Faster Estimation of Top-$k$ Betweenness
Centrality Vertices on {GPU}",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "12:1--12:25",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3486613",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3486613",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2022:AAF,
author = "Liu Liu and Sibren Isaacman and Ulrich Kremer",
title = "An Adaptive Application Framework with Customizable
Quality Metrics",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "13:1--13:33",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3477428",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3477428",
abstract = "Many embedded environments require applications to
produce outcomes under different, potentially changing,
resource constraints. Relaxing application semantics
through approximations enables trading off resource
usage for outcome quality. Although quality \ldots{}",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chowdhury:2022:LAH,
author = "Prattay Chowdhury and Benjamin Carrion Schafer",
title = "Leveraging Automatic High-Level Synthesis Resource
Sharing to Maximize Dynamical Voltage Overscaling with
Error Control",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "14:1--14:18",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3473909",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3473909",
abstract = "Approximate Computing has emerged as an alternative
way to further reduce the power consumption of
integrated circuits (ICs) by trading off errors at the
output with simpler, more efficient logic. So far the
main approaches in approximate computing have
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Han:2022:DSL,
author = "Ming Han and Ye Wang and Jian Dong and Gang Qu",
title = "Double-Shift: a Low-Power {DNN} Weights Storage and
Access Framework based on Approximate Decomposition and
Quantization",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "15:1--15:16",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3477047",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3477047",
abstract = "One major challenge in deploying Deep Neural Network
(DNN) in resource-constrained applications, such as
edge nodes, mobile embedded systems, and IoT devices,
is its high energy cost. The emerging approximate
computing methodology can effectively reduce \ldots{}",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ebrahimi:2022:PCL,
author = "Zahra Ebrahimi and Dennis Klar and Mohammad Aasim
Ekhtiyar and Akash Kumar",
title = "Plasticine: a Cross-layer Approximation Methodology
for Multi-kernel Applications through Minimally Biased,
High-throughput, and Energy-efficient {SIMD} Soft
Multiplier-divider",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "16:1--16:33",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3486616",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3486616",
abstract = "The rapid evolution of error-resilient programs
intertwined with their quest for high throughput has
motivated the use of Single Instruction, Multiple Data
(SIMD) components in Field-Programmable Gate Arrays
(FPGAs). Particularly, to exploit the error-.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lee:2022:DAA,
author = "Jaechul Lee and C{\'e}dric Killian and Sebastien {Le
Beux} and Daniel Chillet",
title = "Distance-aware Approximate Nanophotonic Interconnect",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "17:1--17:30",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3484309",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3484309",
abstract = "The energy consumption of manycore architectures is
dominated by data movement, which calls for
energy-efficient and high-bandwidth interconnects. To
overcome the bandwidth limitation of electrical
interconnects, integrated optics appear as a promising
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Angizi:2022:MRN,
author = "Shaahin Angizi and Navid Khoshavi and Andrew Marshall
and Peter Dowben and Deliang Fan",
title = "{MeF-RAM}: a New Non-Volatile Cache Memory Based on
Magneto-Electric {FET}",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "18:1--18:18",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3484222",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3484222",
abstract = "Magneto-Electric FET (MEFET) is a recently developed
post-CMOS FET, which offers intriguing characteristics
for high-speed and low-power design in both logic and
memory applications. In this article, we present
MeF-RAM, a non-volatile cache memory design \ldots{}",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Shi:2022:CHD,
author = "Xiao Shi and Hao Yan and Qiancun Huang and Chengzhen
Xuan and Lei He and Longxing Shi",
title = "A Compact High-Dimensional Yield Analysis Method using
Low-Rank Tensor Approximation",
journal = j-TODAES,
volume = "27",
number = "2",
pages = "19:1--19:23",
month = mar,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3483941",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Feb 17 07:56:21 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3483941",
abstract = "``Curse of dimensionality'' has become the major
challenge for existing high-sigma yield analysis
methods. In this article, we develop a meta-model using
Low-Rank Tensor Approximation (LRTA) to substitute
expensive SPICE simulation. The polynomial degree of
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cai:2022:EDL,
author = "Han Cai and Ji Lin and Yujun Lin and Zhijian Liu and
Haotian Tang and Hanrui Wang and Ligeng Zhu and Song
Han",
title = "Enable Deep Learning on Mobile Devices: Methods,
Systems, and Applications",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "20:1--20:50",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3486618",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3486618",
abstract = "Deep neural networks (DNNs) have achieved
unprecedented success in the field of artificial
intelligence (AI), including computer vision, natural
language processing, and speech recognition. However,
their superior performance comes at the considerable
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{S:2022:EEE,
author = "Skandha Deepsita S. and Dhayala Kumar M. and Noor
Mahammad SK",
title = "Energy Efficient Error Resilient Multiplier Using
Low-power Compressors",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "21:1--21:26",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3488837",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3488837",
abstract = "The approximate hardware design can save huge energy
at the cost of errors incurred in the design. This
article proposes the approximate algorithm for
low-power compressors, utilized to build approximate
multiplier with low energy and acceptable error
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Oldja:2022:HSS,
author = "Mari-Liis Oldja and Jangryul Kim and Dowhan Jeong and
Soonhoi Ha",
title = "Hierarchical Scheduling of an {SDF/L} Graph onto
Multiple Processors",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "22:1--22:23",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3489469",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3489469",
abstract = "Although dataflow models are known to thrive at
exploiting task-level parallelism of an application, it
is difficult to exploit the parallelism of data,
represented well with loop structures, since these
structures are not explicitly specified in existing
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2022:UTB,
author = "Si Chen and Guoqi Xie and Renfa Li and Keqin Li",
title = "Uncertainty Theory Based Partitioning for
Cyber-Physical Systems with Uncertain Reliability
Analysis",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "23:1--23:19",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3490177",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3490177",
abstract = "Reasonable partitioning is a critical issue for
cyber-physical system (CPS) design. Traditional CPS
partitioning methods run in a determined context and
depend on the parameter pre-estimations, but they
ignore the uncertainty of parameters and hardly
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Luo:2022:FDF,
author = "Yukui Luo and Shijin Duan and Xiaolin Xu",
title = "{FPGAPRO}: a Defense Framework Against
Crosstalk-Induced Secret Leakage in {FPGA}",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "24:1--24:31",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3491214",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3491214",
abstract = "With the emerging cloud-computing development, FPGAs
are being integrated with cloud servers for higher
performance. Recently, it has been explored to enable
multiple users to share the hardware resources of a
remote FPGA, i.e., to execute their own \ldots{}",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Feng:2022:TTO,
author = "Lang Feng and Jiayi Huang and Jeff Huang and Jiang
Hu",
title = "Toward Taming the Overhead Monster for Data-flow
Integrity",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "25:1--25:24",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3490176",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3490176",
abstract = "Data-Flow Integrity (DFI) is a well-known approach to
effectively detecting a wide range of software attacks.
However, its real-world application has been quite
limited so far because of the prohibitive performance
overhead it incurs. Moreover, the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mahalat:2022:ICA,
author = "Mahabub Hasan Mahalat and Suraj Mandal and Anindan
Mondal and Bibhash Sen and Rajat Subhra Chakraborty",
title = "Implementation, Characterization and Application of
Path Changing Switch based Arbiter {PUF} on {FPGA} as a
lightweight Security Primitive for {IoT}",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "26:1--26:26",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3491212",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3491212",
abstract = "Secure authentication of any Internet-of-Things (IoT)
device becomes the utmost necessity due to the lack of
specifically designed IoT standards and intrinsic
vulnerabilities with limited resources and
heterogeneous technologies. Despite the suitability
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Baker:2022:CMA,
author = "Timothy J. Baker and John P. Hayes",
title = "{CeMux}: Maximizing the Accuracy of Stochastic Mux
Adders and an Application to Filter Design",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "27:1--27:26",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3491213",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3491213",
abstract = "Stochastic computing (SC) is a low-cost computational
paradigm that has promising applications in digital
filter design, image processing, and neural networks.
Fundamental to these applications is the weighted
addition operation, which is most often \ldots{}",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Elangovan:2022:ABA,
author = "Reena Elangovan and Shubham Jain and Anand
Raghunathan",
title = "{Ax-BxP}: Approximate Blocked Computation for
Precision-reconfigurable Deep Neural Network
Acceleration",
journal = j-TODAES,
volume = "27",
number = "3",
pages = "28:1--28:20",
month = may,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3492733",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Mar 24 16:05:33 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3492733",
abstract = "Precision scaling has emerged as a popular technique
to optimize the compute and storage requirements of
Deep Neural Networks (DNNs). Efforts toward creating
ultra-low-precision (sub-8-bit) DNNs for efficient
inference suggest that the minimum precision \ldots{}",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pilato:2022:ISS,
author = "Christian Pilato and Zhenman Fang and Yuko Hara-Azumi
and Jim Hwang",
title = "Introduction to the Special Section on High-level
Synthesis for {FPGA}: Next-generation Technologies and
Applications",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "29:1--29:2",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3519279",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3519279",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ramanathan:2022:CPF,
author = "Nadesh Ramanathan and George A. Constantinides and
John Wickerson",
title = "A Case for Precise, Fine-Grained Pointer Synthesis in
High-Level Synthesis",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "30:1--30:26",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3491430",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3491430",
abstract = "This article combines two practical approaches to
improve pointer synthesis within HLS tools. Both
approaches focus on inefficiencies in how HLS tools
treat the points-to graph- a mapping that connects each
instruction to the memory locations that it might
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sun:2022:CMO,
author = "Qi Sun and Tinghuan Chen and Siting Liu and Jianli
Chen and Hao Yu and Bei Yu",
title = "Correlated Multi-objective Multi-fidelity Optimization
for {HLS} Directives Design",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "31:1--31:27",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3503540",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3503540",
abstract = "High-level synthesis (HLS) tools have gained great
attention in recent years because it emancipates
engineers from the complicated and heavy hardware
description language writing and facilitates the
implementations of modern applications (e.g., deep
\ldots{})",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sohrabizadeh:2022:AES,
author = "Atefeh Sohrabizadeh and Cody Hao Yu and Min Gao and
Jason Cong",
title = "{AutoDSE}: Enabling Software Programmers to Design
Efficient {FPGA} Accelerators",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "32:1--32:27",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3494534",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3494534",
abstract = "Adopting FPGA as an accelerator in datacenters is
becoming mainstream for customized computing, but the
fact that FPGAs are hard to program creates a steep
learning curve for software programmers. Even with the
help of high-level synthesis (HLS), \ldots{}",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gautier:2022:SMO,
author = "Quentin Gautier and Alric Althoff and Christopher L.
Crutchfield and Ryan Kastner",
title = "{Sherlock}: a Multi-Objective Design Space Exploration
Framework",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "33:1--33:20",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3511472",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3511472",
abstract = "Design space exploration (DSE) provides intelligent
methods to tune the large number of optimization
parameters present in modern FPGA high-level synthesis
tools. High-level synthesis parameter tuning is a
time-consuming process due to lengthy hardware
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2022:LPE,
author = "Zi Wang and Benjamin Carrion Schafer",
title = "Learning from the Past: Efficient High-level Synthesis
Design Space Exploration for {FPGAs}",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "34:1--34:23",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3495531",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3495531",
abstract = "The quest to democratize the use of Field-Programmable
Gate Arrays (FPGAs) has given High-Level Synthesis
(HLS) the final push to be widely accepted with FPGA
vendors strongly supporting this VLSI design
methodology to expand the FPGA user base. HLS takes
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sjovall:2022:HLS,
author = "Panu Sj{\"o}vall and Ari Lemmetti and Jarno Vanne and
Sakari Lahti and Timo D. H{\"a}m{\"a}l{\"a}inen",
title = "High-Level Synthesis Implementation of an Embedded
Real-Time {HEVC} Intra Encoder on {FPGA} for Media
Applications",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "35:1--35:34",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3491215",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3491215",
abstract = "High Efficiency Video Coding (HEVC) is the key
enabling technology for numerous modern media
applications. Overcoming its computational complexity
and customizing its rich features for real-time HEVC
encoder implementations, calls for automated design
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2022:LOH,
author = "Yanjiang Liu and Tongzhou Qu and Zibin Dai",
title = "A Low-Overhead and High-Security Cryptographic Circuit
Design Utilizing the {TIGFET}-Based Three-Phase
Single-Rail Pulse Register against Side-Channel
Attacks",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "36:1--36:13",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3498339",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3498339",
abstract = "Side-channel attack (SCA) reveals confidential
information by statistically analyzing physical
manifestations, which is the serious threat to
cryptographic circuits. Various SCA circuit-level
countermeasures have been proposed as fundamental
solutions to \ldots{}",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2022:AHS,
author = "Shanshi Huang and Xiaoyu Sun and Xiaochen Peng and
Hongwu Jiang and Shimeng Yu",
title = "Achieving High In Situ Training Accuracy and Energy
Efficiency with Analog Non-Volatile Synaptic Devices",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "37:1--37:19",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3500929",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3500929",
abstract = "On-device embedded artificial intelligence prefers the
adaptive learning capability when deployed in the
field, and thus in situ training is required. The
compute-in-memory approach, which exploits the analog
computation within the memory array, is a \ldots{}",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Uysal:2022:SCN,
author = "Necati Uysal and Rickard Ewetz",
title = "Synthesis of Clock Networks with a Mode-Reconfigurable
Topology",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "38:1--38:22",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3503538",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3503538",
abstract = "Modern digital circuits are often required to operate
in multiple modes to cater to variable frequency and
power requirements. Consequently, the clock networks
for such circuits must be synthesized, meeting
different timing constraints in different \ldots{}",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Handique:2022:FLS,
author = "Mousum Handique and Jantindra Kumar Deka and Santosh
Biswas",
title = "Fault Localization Scheme for Missing Gate Faults in
Reversible Circuits",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "39:1--39:29",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3503539",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3503539",
abstract = "This article introduces a fault localization method to
extract the exact location of single and multiple
missing gate faults in reversible \( k \) -CNOT -based
circuits. The primary target of the proposed method is
to obtain the complete test set for \ldots{}",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Guo:2022:SSD,
author = "Wenzhong Guo and Sihuang Lian and Chen Dong and Zhenyi
Chen and Xing Huang",
title = "A Survey on Security of Digital Microfluidic Biochips:
Technology, Attack, and Defense",
journal = j-TODAES,
volume = "27",
number = "4",
pages = "40:1--40:33",
month = jul,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3494697",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 25 08:20:01 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3494697",
abstract = "As an emerging lab-on-a-chip technology platform,
digital microfluidic biochips (DMFBs) have been widely
used for executing various laboratory procedures in
biochemistry and biomedicine such as gene sequencing
and near-patient diagnosis, with the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chandra:2022:ISS,
author = "Vikas Chandra and Yiran Chen and Sungjoo Yoo",
title = "Introduction to the Special Section on
Energy-Efficient {AI} Chips",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "41:1--41:2",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3538502",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3538502",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lee:2022:MEC,
author = "Sunjung Lee and Jaewan Choi and Wonkyung Jung and
Byeongho Kim and Jaehyun Park and Hweesoo Kim and Jung
Ho Ahn",
title = "{MVP}: an Efficient {CNN} Accelerator with Matrix,
Vector, and Processing-Near-Memory Units",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "42:1--42:25",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3497745",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3497745",
abstract = "Mobile and edge devices become common platforms for
inferring convolutional neural networks (CNNs) due to
superior privacy and service quality. To reduce the
computational costs of convolution (CONV), recent CNN
models adopt depth-wise CONV (DW-CONV) and \ldots{}",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cicek:2022:EEB,
author = "Nihat Mert Cicek and Xipeng Shen and Ozcan Ozturk",
title = "Energy Efficient Boosting of {GEMM} Accelerators for
{DNN} via Reuse",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "43:1--43:26",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3503469",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3503469",
abstract = "Reuse-centric convolutional neural networks (CNN)
acceleration speeds up CNN inference by reusing
computations for similar neuron vectors in CNN's input
layer or activation maps. This new paradigm of
optimizations is, however, largely limited by the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2022:EEL,
author = "Zhe Chen and Hugh T. Blair and Jason Cong",
title = "Energy-Efficient {LSTM} Inference Accelerator for
Real-Time Causal Prediction",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "44:1--44:19",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3495006",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3495006",
abstract = "Ever-growing edge applications often require short
processing latency and high energy efficiency to meet
strict timing and power budget. In this work, we
propose that the compact long short-term memory (LSTM)
model can approximate conventional acausal. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Shiri:2022:EEE,
author = "Aidin Shiri and Uttej Kallakuri and Hasib-Al Rashid
and Bharat Prakash and Nicholas R. Waytowich and Tim
Oates and Tinoosh Mohsenin",
title = "{E2HRL}: an Energy-efficient Hardware Accelerator for
Hierarchical Deep Reinforcement Learning",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "45:1--45:19",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3498327",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3498327",
abstract = "Recently, Reinforcement Learning (RL) has shown great
performance in solving sequential decision-making and
control in dynamic environment problems. Despite its
achievements, deploying Deep Neural Network (DNN)-based
RL is expensive in terms of time and \ldots{}",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Laubeuf:2022:DQR,
author = "Nathan Laubeuf and Jonas Doevenspeck and Ioannis A.
Papistas and Michele Caselli and Stefan Cosemans and
Peter Vrancx and Debjyoti Bhattacharjee and Arindam
Mallik and Peter Debacker and Diederik Verkest and
Francky Catthoor and Rudy Lauwereins",
title = "Dynamic Quantization Range Control for
Analog-in-Memory Neural Networks Acceleration",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "46:1--46:21",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3498328",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3498328",
abstract = "Analog in Memory Computing (AiMC) based neural network
acceleration is a promising solution to increase the
energy efficiency of deep neural networks deployment.
However, the quantization requirements of these analog
systems are not compatible with state-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gong:2022:AMB,
author = "Yifan Gong and Geng Yuan and Zheng Zhan and Wei Niu
and Zhengang Li and Pu Zhao and Yuxuan Cai and Sijia
Liu and Bin Ren and Xue Lin and Xulong Tang and Yanzhi
Wang",
title = "Automatic Mapping of the Best-Suited {DNN} Pruning
Schemes for Real-Time Mobile Acceleration",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "47:1--47:26",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3495532",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3495532",
abstract = "Weight pruning is an effective model compression
technique to tackle the challenges of achieving
real-time deep neural network (DNN) inference on mobile
devices. However, prior pruning schemes have limited
application scenarios due to accuracy degradation,.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lee:2022:ION,
author = "Jooyeon Lee and Junsang Park and Seunghyun Lee and
Jaeha Kung",
title = "Implication of Optimizing {NPU} Dataflows on Neural
Architecture Search for Mobile Devices",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "48:1--48:24",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3513085",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3513085",
abstract = "Recent advances in deep learning have made it possible
to implement artificial intelligence in mobile devices.
Many studies have put a lot of effort into developing
lightweight deep learning models optimized for mobile
devices. To overcome the performance \ldots{}",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tang:2022:ETE,
author = "Yue Tang and Xinyi Zhang and Peipei Zhou and Jingtong
Hu",
title = "{EF-Train}: Enable Efficient On-device {CNN} Training
on {FPGA} through Data Reshaping for Online Adaptation
or Personalization",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "49:1--49:36",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3505633",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3505633",
abstract = "Conventionally, DNN models are trained once in the
cloud and deployed in edge devices such as cars,
robots, or unmanned aerial vehicles (UAVs) for
real-time inference. However, there are many cases that
require the models to adapt to new environments,
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2022:DDN,
author = "Chaojian Li and Wuyang Chen and Yuchen Gu and Tianlong
Chen and Yonggan Fu and Zhangyang Wang and Yingyan
Lin",
title = "{DANCE}: {DAta-Network Co-optimization for Efficient}
Segmentation Model Training and Inference",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "50:1--50:20",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3510835",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3510835",
abstract = "Semantic segmentation for scene understanding is
nowadays widely demanded, raising significant
challenges for the algorithm efficiency, especially its
applications on resource-limited platforms. Current
segmentation models are trained and evaluated on
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kee:2022:LPP,
author = "Minkwan Kee and Gi-Ho Park",
title = "A Low-power Programmable Machine Learning Hardware
Accelerator Design for Intelligent Edge Devices",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "51:1--51:13",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3531479",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3531479",
abstract = "With the advent of the machine learning and IoT, many
low-power edge devices, such as wearable devices with
various sensors, are used for machine learning-based
intelligent applications, such as healthcare or motion
recognition. While these applications \ldots{}",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wen:2022:MCT,
author = "Chenyi Wen and Xiao Dong and Baixin Chen and
Umamaheswara Rao Tida and Yiyu Shi and Cheng Zhuo",
title = "Magnetic Core {TSV}-Inductor Design and Optimization
for On-chip {DC-DC} Converter",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "52:1--52:23",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3507700",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3507700",
abstract = "The conventional on-chip spiral inductor consumes a
significant top-metal routing area, thereby preventing
its popularity in many on-chip applications. Recently
through-silicon-via- (TSV) based inductor (also known
as a TSV-inductor) with a magnetic core \ldots{}",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dewan:2022:DAA,
author = "Monzurul Islam Dewan and Dae Hyun Kim",
title = "Design Automation Algorithms for the {NP}-Separate
{VLSI} Design Methodology",
journal = j-TODAES,
volume = "27",
number = "5",
pages = "53:1--53:20",
month = sep,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3508375",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Sep 28 11:01:08 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3508375",
abstract = "The NP-Separate design methodology for
very-large-scale integration (VLSI) design
fine-controls the sizes of transistors, thereby
achieving significant power, performance, and area
improvement compared to the conventional
standard-cell-based design \ldots{}",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2022:IFC,
author = "Irith Pomeranz",
title = "Increasing the Fault Coverage of a Truncated Test
Set",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "54:1--54:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3508459",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3508459",
abstract = "Defect-aware, cell-aware, and gate-exhaustive faults
are described by input patterns of subcircuits or cells
that are expected to activate defects. Even with
single-cycle faults, an $n$-input subcircuit can
have up to $ 2^n $ faults with unique \ldots{}",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jagadheesh:2022:NAM,
author = "Samala Jagadheesh and P. Veda Bhanu and Soumya J.",
title = "{NoC} Application Mapping Optimization Using
Reinforcement Learning",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "55:1--55:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3510381",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3510381",
abstract = "Application mapping is one of the early stage design
processes aimed to improve the performance of
Network-on-Chip. Mapping is an NP-hard problem. A
massive amount of high-quality supervised data is
required to solve the application mapping problem using
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kolhe:2022:BDS,
author = "Gaurav Kolhe and Tyler David Sheaves and Sai Manoj P.
D. and Hamid Mahmoodi and Setareh Rafatirad and Avesta
Sasan and Houman Homayoun",
title = "Breaking the Design and Security Trade-off of
Look-up-table-based Obfuscation",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "56:1--56:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3510421",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3510421",
abstract = "Logic locking and Integrated Circuit (IC) camouflaging
are the most prevalent protection schemes that can
thwart most hardware security threats. However, the
state-of-the-art attacks, including Boolean
Satisfiability (SAT) and approximation-based attacks,
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2022:NAD,
author = "Taozhong Li and Naifeng Jing and Jianfei Jiang and Qin
Wang and Zhigang Mao and Yiran Chen",
title = "A Novel Architecture Design for Output Significance
Aligned Flow with Adaptive Control in {ReRAM}-based
Neural Network Accelerator",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "57:1--57:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3510819",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3510819",
abstract = "Resistive-RAM-based (ReRAM-based) computing shows
great potential on accelerating DNN inference by its
highly parallel structure. Regrettably, computing
accuracy in practical is much lower than expected due
to the non-ideal ReRAM device. Conventional \ldots{}",
acknowledgement = ack-nhfb,
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Brunner:2022:THR,
author = "Michaela Brunner and Alexander Hepp and Johanna Baehr
and Georg Sigl",
title = "Toward a Human-Readable State Machine Extraction",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "58:1--58:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3513086",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3513086",
abstract = "The target of sequential reverse engineering is to
extract the state machine of a design. Sequential
reverse engineering of a gate-level netlist consists of
the identification of so-called state flip-flops
(sFFs), as well as the extraction of the state
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhou:2022:QCT,
author = "Xiangzhen Zhou and Yuan Feng and Sanjiang Li",
title = "Quantum Circuit Transformation: a {Monte Carlo} Tree
Search Framework",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "59:1--59:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3514239",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3514239",
abstract = "In the noisy intermediate-scale quantum era, quantum
processing units suffer from, among others, highly
limited connectivity between physical qubits. To make a
quantum circuit effectively executable, a circuit
transformation process is necessary to \ldots{}",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hong:2022:TNB,
author = "Xin Hong and Xiangzhen Zhou and Sanjiang Li and Yuan
Feng and Mingsheng Ying",
title = "A Tensor Network based Decision Diagram for
Representation of Quantum Circuits",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "60:1--60:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3514355",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3514355",
abstract = "Tensor networks have been successfully applied in
simulation of quantum physical systems for decades.
Recently, they have also been employed in classical
simulation of quantum computing, in particular, random
quantum circuits. This article proposes a \ldots{}",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Choudhury:2022:SHC,
author = "Dwaipayan Choudhury and Reet Barik and Aravind
Sukumaran Rajam and Ananth Kalyanaraman and Partha
Pratim Pande",
title = "Software\slash Hardware Co-design of {$3$D}
{NoC}-based {GPU} Architectures for Accelerated Graph
Computations",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "61:1--61:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3514354",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3514354",
abstract = "Manycore GPU architectures have become the mainstay
for accelerating graph computations. One of the primary
bottlenecks to performance of graph computations on
manycore architectures is the data movement. Since most
of the accesses in graph processing are \ldots{}",
acknowledgement = ack-nhfb,
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jiang:2022:ELH,
author = "Yiyang Jiang and Fan Yang and Bei Yu and Dian Zhou and
Xuan Zeng",
title = "Efficient Layout Hotspot Detection via Neural
Architecture Search",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "62:1--62:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3517130",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3517130",
abstract = "Layout hotspot detection is of great importance in the
physical verification flow. Deep neural network models
have been applied to hotspot detection and achieved
great success. Despite their success, high-performance
neural networks are still quite \ldots{}",
acknowledgement = ack-nhfb,
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Abel:2022:FSS,
author = "Inga Abel and Helmut Graeb",
title = "{FUBOCO}: Structure Synthesis of Basic Op-Amps by
{FUnctional BlOck COmposition}",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "63:1--63:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3522738",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3522738",
abstract = "This article presents a method to automatically
synthesize the structure and initial sizing of an
operational amplifier. It is positioned between
approaches with fixed design plans and a small search
space of structures and approaches with generic
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sha:2022:DMB,
author = "Zhibing Sha and Jun Li and Zhigang Cai and Min Huang
and Jianwei Liao and Francois Trahay",
title = "Degraded Mode-benefited {I/O} Scheduling to Ensure
{I/O} Responsiveness in {RAID}-enabled {SSDs}",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "64:1--64:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3522755",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3522755",
abstract = "RAID-enabled SSDs commonly have unbalanced I/O
workloads on their components (e.g., SSD channels), as
the data/parity chunks in the same stripe may have
varied access frequency, which greatly impacts I/O
responsiveness. This article proposes a I/O \ldots{}",
acknowledgement = ack-nhfb,
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bai:2022:RER,
author = "Yunkai Bai and Andrew Stern and Jungmin Park and Mark
Tehranipoor and Domenic Forte",
title = "{RASCv2}: Enabling Remote Access to Side-Channels for
Mission Critical and {IoT} Systems",
journal = j-TODAES,
volume = "27",
number = "6",
pages = "65:1--65:??",
month = nov,
year = "2022",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3524123",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 25 09:11:49 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3524123",
abstract = "The Internet of Things (IoT) and smart devices are
currently being deployed in systems such as autonomous
vehicles and medical monitoring devices. The
introduction of IoT devices into these systems enables
network connectivity for data transfer, cloud
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hung:2023:DDR,
author = "Jos{\'e} Romero Hung and Chao Li and Taolei Wang and
Jinyang Guo and Pengyu Wang and Chuanming Shao and Jing
Wang and Guoyong Shi and Xiangwen Liu and Hanqing Wu",
title = "{DRAGON}: Dynamic Recurrent Accelerator for Graph
Online Convolution",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3524124",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3524124",
abstract = "Despite the extraordinary applicative potentiality
that dynamic graph inference may entail, its
practical-physical implementation has been a topic
seldom explored in literature. Although graph inference
through neural networks has received plenty of
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Minakova:2023:MTT,
author = "Svetlana Minakova and Todor Stefanov",
title = "Memory-Throughput Trade-off for {CNN}-Based
Applications at the Edge",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3527457",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3527457",
abstract = "Many modern applications require execution of
Convolutional Neural Networks (CNNs) on edge devices,
such as mobile phones or embedded platforms. This can
be challenging, as the state-of-the art CNNs are memory
costly, whereas the memory budget of edge \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chhabria:2023:EDN,
author = "Vidya A. Chhabria and Vipul Ahuja and Ashwath Prabhu
and Nikhil Patil and Palkesh Jain and Sachin S.
Sapatnekar",
title = "Encoder-Decoder Networks for Analyzing Thermal and
Power Delivery Networks",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3526115",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3526115",
abstract = "Power delivery network (PDN) analysis and thermal
analysis are computationally expensive tasks that are
essential for successful integrated circuit (IC)
design. Algorithmically, both these analyses have
similar computational structure and complexity as
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Spieck:2023:LBM,
author = "Jan Spieck and Stefan Wildermann and J{\"u}rgen
Teich",
title = "A Learning-based Methodology for Scenario-aware
Mapping of Soft Real-time Applications onto
Heterogeneous {MPSoCs}",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3529230",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3529230",
abstract = "Soft real-time streaming applications often process
input data that evoke varying workloads for their
tasks. This may lead to high energy consumption or
deadline misses in case their mapping onto a
heterogeneous MPSoC target architecture is not adapted,
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2023:EES,
author = "Chunqiao Li and Chengtao An and Fan Yang and Xuan
Zeng",
title = "{ESPSim}: an Efficient Scalable Power Grid Simulator
Based on Parallel Algebraic Multigrid",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3529533",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3529533",
abstract = "Fast verification for the extremely large-scale power
grid is demanding as CMOS technology advances
consistently. In this work, we propose ESPSim, an
efficient scalable power grid simulator based on a
parallel smoothed aggregation-based algebraic
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2023:RRB,
author = "Chenglong Huang and Nuo Xu and Junwei Zeng and Wenqing
Wang and Yihong Hu and Liang Fang and Desheng Ma and
Yanting Chen",
title = "Rescuing {ReRAM}-based Neural Computing Systems from
Device Variation",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3533706",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3533706",
abstract = "Resistive random-access memory (ReRAM)-based crossbar
array (RCA) is a promising platform to accelerate
vector-matrix multiplication in deep neural networks
(DNNs). There are, however, some practical issues,
especially device variation, that hinder the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ding:2023:MAP,
author = "Bo Ding and Jinglei Huang and Qi Xu and Junpeng Wang
and Song Chen and Yi Kang",
title = "Memory-aware Partitioning, Scheduling, and
Floorplanning for Partially Dynamically Reconfigurable
Systems",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3534968",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3534968",
abstract = "Partially dynamic reconfiguration (PDR) technology can
accelerate the reconfiguration process and overcome
hardware resource constraints when facing the challenge
of high performance with respect to applications and
resources constraints on field-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zeng:2023:AMM,
author = "Junwei Zeng and Nuo Xu and Yabo Chen and Chenglong
Huang and Zhiwei Li and Liang Fang",
title = "{AIMCU-MESO}: an In-Memory Computing Unit Constructed
by {MESO} Device",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3539575",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3539575",
abstract = "Traditional CMOS-based von-Neumann computer
architecture faces the issue of memory wall that the
limitation of bus-bandwidth and the speed mismatch
between processor and memory restrict the efficiency of
data processing along with an irreducible energy
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Das:2023:CCV,
author = "Sourav Das and Sayandeep Sanyal and Aritra Hazra and
Pallab Dasgupta",
title = "{CoVerPlan}: a Comprehensive Verification Planning
Framework Leveraging {PSS} Specifications",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3543175",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3543175",
abstract = "With increasing design complexity, the portability of
tests across different designs and platforms becomes a
key criterion for accelerating verification closure.
The Portable Test and Stimulus Standard (PSS) is an
emerging industry standard prepared by \ldots{}",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Song:2023:VEE,
author = "Zhuoran Song and Naifeng Jing and Xiaoyao Liang",
title = "{E$^2$-VOR}: an End-to-End En\slash Decoder
Architecture for Efficient Video Object Recognition",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3543852",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3543852",
abstract = "High-resolution video object recognition (VOR) evolves
so fast but is very compute-intensive. This is because
VOR leverages compute-intensive deep neural network
(DNN) for better accuracy. Although many works have
been proposed for speedup, they mostly \ldots{}",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhao:2023:MSF,
author = "Zhiqiang Zhao and Zhuo Feng",
title = "A Multilevel Spectral Framework for Scalable
Vectorless Power\slash Thermal Integrity Verification",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3529534",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3529534",
abstract = "Vectorless integrity verification is becoming
increasingly critical to the robust design of nanoscale
integrated circuits. This article introduces a general
vectorless integrity verification framework that allows
computing the worst-case voltage drops or \ldots{}",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2023:SDP,
author = "Kai Huang and Bowen Li and Dongliang Xiong and Haitian
Jiang and Xiaowen Jiang and Xiaolang Yan and Luc
Claesen and Dehong Liu and Junjian Chen and Zhili Liu",
title = "Structured Dynamic Precision for Deep Neural Networks
Quantization",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3549535",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3549535",
abstract = "Deep Neural Networks (DNNs) have achieved remarkable
success in various Artificial Intelligence
applications. Quantization is a critical step in DNNs
compression and acceleration for deployment. To further
boost DNN execution efficiency, many works \ldots{}",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ebrahimi-Azandaryani:2023:ACA,
author = "Farhad Ebrahimi-Azandaryani and Omid Akbari and Mehdi
Kamal and Ali Afzali-Kusha and Massoud Pedram",
title = "Accuracy Configurable Adders with Negligible Delay
Overhead in Exact Operating Mode",
journal = j-TODAES,
volume = "28",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3549936",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:22 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3549936",
abstract = "In this paper, two accuracy configurable adders
capable of operating in approximate and exact modes are
proposed. In the adders, which include a block-based
carry propagate and a parallel prefix structure, the
carry chains are cut off in the approximate \ldots{}",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lin:2023:ISI,
author = "Yibo Lin and Avi Ziv and Haoxing Ren",
title = "Introduction to the Special Issue on Machine Learning
for {CAD\slash EDA}",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "14:1--14:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3586208",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3586208",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sanchez:2023:CSE,
author = "Daniela S{\'a}nchez and Lorenzo Servadei and Gamze Naz
Kiprit and Robert Wille and Wolfgang Ecker",
title = "A Comprehensive Survey on Electronic Design Automation
and Graph Neural Networks: Theory and Applications",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "15:1--15:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3543853",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3543853",
abstract = "Driven by Moore's law, the chip design complexity is
steadily increasing. Electronic Design Automation (EDA)
has been able to cope with the challenging very
large-scale integration process, assuring scalability,
reliability, and proper time-to-market. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Koblah:2023:SPA,
author = "David Koblah and Rabin Acharya and Daniel Capecci and
Olivia Dizon-Paradis and Shahin Tajik and Fatemeh Ganji
and Damon Woodard and Domenic Forte",
title = "A Survey and Perspective on Artificial Intelligence
for Security-Aware Electronic Design Automation",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "16:1--16:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3563391",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3563391",
abstract = "Artificial intelligence (AI) and machine learning (ML)
techniques have been increasingly used in several
fields to improve performance and the level of
automation. In recent years, this use has exponentially
increased due to the advancement of high-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Fan:2023:PCC,
author = "Shaoze Fan and Shun Zhang and Jianbo Liu and Ningyuan
Cao and Xiaoxiao Guo and Jing Li and Xin Zhang",
title = "Power Converter Circuit Design Automation Using
Parallel {Monte Carlo} Tree Search",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3549538",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3549538",
abstract = "The tidal waves of modern electronic/electrical
devices have led to increasing demands for ubiquitous
application-specific power converters. A conventional
manual design procedure of such power converters is
computation- and labor-intensive, which \ldots{}",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Song:2023:MLA,
author = "Ling-Yen Song and Chih-Yun Chou and Tung-Chieh Kuo and
Chien-Nan Liu and Juinn-Dar Huang",
title = "Machine Learning Assisted Circuit Sizing Approach for
Low-Voltage Analog Circuits with Efficient
Variation-Aware Optimization",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3567422",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3567422",
abstract = "Low-power analog design is a hot topic for various
power efficient applications. Sizing low-power analog
circuits is not easy because the increasing
uncertainties from low-voltage techniques magnify
process variation effects on the design yield.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2023:PDW,
author = "Yaguang Li and Yishuang Lin and Meghna Madhusudan and
Arvind Sharma and Sachin Sapatnekar and Ramesh Harjani
and Jiang Hu",
title = "Performance-driven Wire Sizing for Analog Integrated
Circuits",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3559542",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3559542",
abstract = "Analog IC performance has a strong dependence on
interconnect RC parasitics, which are significantly
affected by wire sizes in recent technologies, where
minimum-width wires have high resistance. However,
performance-driven wire sizing for analog ICs has
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cheng:2023:MLD,
author = "Jiawen Cheng and Yong Xiao and Yun Shao and Guanghai
Dong and Songlin Lyu and Wenjian Yu",
title = "Machine-learning-driven Architectural Selection of
Adders and Multipliers in Logic Synthesis",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3560712",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3560712",
abstract = "Designing high-performance adders and multiplier
components for diverse specifications and constraints
is of practical concern. However, selecting the best
architecture for adder or multiplier, which largely
affects the performance of synthesized circuits,.
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2023:GFG,
author = "Yiting Liu and Ziyi Ju and Zhengming Li and Mingzhi
Dong and Hai Zhou and Jia Wang and Fan Yang and Xuan
Zeng and Li Shang",
title = "{GraphPlanner}: Floorplanning with Graph Neural
Network",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3555804",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3555804",
abstract = "Chip floorplanning has long been a critical task with
high computation complexity in the physical
implementation of VLSI chips. Its key objective is to
determine the initial locations of large chip modules
with minimized wirelength while adhering to the
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Fang:2023:ETC,
author = "Chenlei Fang and Qicheng Huang and Zeye Liu and
Ruizhou Ding and Ronald D. Blanton",
title = "Efficient Test Chip Design via Smart Computation",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3558393",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3558393",
abstract = "Submitted to the Special Issue on Machine Learning for
CAD (ML-CAD). Competitive strength in semiconductor
field depends on yield. The challenges associated with
designing and manufacturing of leading-edge integrated
circuits (ICs) have increased that \ldots{}",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lozano:2023:LBP,
author = "Erika Susana Alcorta Lozano and Andreas Gerstlauer",
title = "Learning-based Phase-aware Multi-core {CPU} Workload
Forecasting",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564929",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564929",
abstract = "Predicting workload behavior during workload execution
is essential for dynamic resource optimization in
multi-processor systems. Recent studies have proposed
advanced machine learning techniques for dynamic
workload prediction. Workload prediction can be
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2023:MLB,
author = "Benzheng Li and Xi Zhang and Hailong You and Zhongdong
Qi and Yuming Zhang",
title = "Machine Learning Based Framework for Fast Resource
Estimation of {RTL} Designs Targeting {FPGAs}",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3555047",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3555047",
abstract = "Field-programmable gate arrays (FPGAs) have grown to
be an important platform for integrated circuit design
and hardware emulation. However, with the dramatic
increase in design scale, it has become a key challenge
to partition very large scale \ldots{}",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ferretti:2023:GNN,
author = "Lorenzo Ferretti and Andrea Cini and Georgios
Zacharopoulos and Cesare Alippi and Laura Pozzi",
title = "Graph Neural Networks for High-Level Synthesis Design
Space Exploration",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3570925",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3570925",
abstract = "High-level Synthesis (HLS) Design-Space Exploration
(DSE) aims at identifying Pareto-optimal synthesis
configurations whose exhaustive search is unfeasible
due to the design-space dimensionality and the
prohibitive computational cost of the synthesis
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Last:2023:TPM,
author = "Felix Last and Ulf Schlichtmann",
title = "Training {PPA} Models for Embedded Memories on a
Low-data Diet",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3556539",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3556539",
abstract = "Supervised machine learning requires large amounts of
labeled data for training. In power, performance, and
area (PPA) estimation of embedded memories, every new
memory compiler version is considered independently of
previous compiler versions. Since the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xing:2023:BPB,
author = "Wei W. Xing and Xiang Jin and Tian Feng and Dan Niu
and Weisheng Zhao and Zhou Jin",
title = "{BoA-PTA}: a {Bayesian Optimization Accelerated PTA}
Solver for {SPICE} Simulation",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3555805",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3555805",
abstract = "One of the greatest challenges in integrated circuit
design is the repeated executions of computationally
expensive SPICE simulations, particularly when highly
complex chip testing/verification is involved.
Recently, pseudo-transient analysis (PTA) has
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dai:2023:SAD,
author = "Ruochen Dai and Tuba Yavuz",
title = "A Symbolic Approach to Detecting Hardware {Trojans}
Triggered by Don't Care Transitions",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3558392",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3558392",
abstract = "Due to the globalization of Integrated Circuit supply
chain, hardware Trojans and the attacks that can
trigger them have become an important security issue.
One type of hardware Trojans leverages the ``don't care
transitions'' in Finite-state Machines (FSMs).
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2023:AMC,
author = "Zhisheng Chen and Wenzhong Guo and Genggeng Liu and
Xing Huang",
title = "Application Mapping and Control-system Design for
Microfluidic Biochips with Distributed Channel
Storage",
journal = j-TODAES,
volume = "28",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564288",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed Apr 5 10:07:23 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564288",
abstract = "Continuous-flow microfluidic biochips have emerged as
a potential low-cost and fast-responsive lab-on-chip
platform. They have attracted much attention due to
their capability of performing various biochemical
applications concurrently and automatically \ldots{}",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Choudhury:2023:AGC,
author = "Dwaipayan Choudhury and Lizhi Xiang and Aravind Rajam
and Anantharaman Kalyanaraman and Partha Pratim Pande",
title = "Accelerating Graph Computations on {$3$D}
{NoC}-Enabled {PIM} Architectures",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "30:1--30:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564290",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564290",
abstract = "Graph application workloads are dominated by random
memory accesses with the poor locality. To tackle the
irregular and sparse nature of computation, ReRAM-based
Processing-in-Memory (PIM) architectures have been
proposed recently. Most of these ReRAM \ldots{}",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lee:2023:VEL,
author = "Jayoung Lee and Pengcheng Wang and Ran Xu and Sarthak
Jain and Venkat Dasari and Noah Weston and Yin Li and
Saurabh Bagchi and Somali Chaterji",
title = "Virtuoso: Energy- and Latency-aware Streamlining of
Streaming Videos on Systems-on-Chips",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "31:1--31:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564289",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564289",
abstract = "Efficient and adaptive computer vision systems have
been proposed to make computer vision tasks, such as
image classification and object detection, optimized
for embedded or mobile devices. These solutions, quite
recent in their origin, focus on \ldots{}",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bommana:2023:DST,
author = "Ashish Reddy Bommana and Susheel Ujwal Siddamshetty
and Dhilleswararao Pudi and Arvind Thumatti K. R. and
Srinivas Boppu and M. Sabarimalai Manikandan and Linga
Reddy Cenkeramaddi",
title = "Design of Synthesis-time Vectorized Arithmetic
Hardware for Tapered Floating-point Addition and
Subtraction",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "32:1--32:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3567423",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3567423",
abstract = "Energy efficiency has become the new performance
criterion in this era of pervasive embedded computing;
thus, accelerator-rich multi-processor system-on-chips
are commonly used in embedded computing hardware. Once
computationally intensive machine learning applications
gained much traction, they are now deployed in many
application domains due to abundant and cheaply
available computational capacity. In addition, there is
a growing trend toward developing hardware accelerators
for machine learning applications for embedded edge
devices where performance and energy efficiency are
critical. Although these hardware accelerators
frequently use floating-point operations for accuracy,
reduced-width floating-point formats are also used to
reduce hardware complexity; thus, power consumption
while maintaining accuracy. Vectorization concepts can
also be used to improve performance, energy efficiency,
and memory bandwidth. We propose the design of a
vectorized floating-point adder/subtractor that
supports arbitrary length floating-point formats with
varying exponent and mantissa widths in this article.
In comparison to existing designs in the literature,
the proposed design is 2.57$ \times $ area- and 1.56$
\times $ power-efficient, and it supports true
vectorization with no restrictions on exponent and
mantissa widths.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yang:2023:ATF,
author = "Chun-Chieh Yang and Yi-Ru Chen and Hui-Hsin Liao and
Yuan-Ming Chang and Jenq-Kuen Lee",
title = "Auto-tuning Fixed-point Precision with {TVM} on
{RISC-V} Packed {SIMD} Extension",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "33:1--33:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3569939",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3569939",
abstract = "Today, as deep learning (DL) is applied more often in
daily life, dedicated processors such as CPUs and GPUs
have become very important for accelerating model
executions. With the growth of technology, people are
becoming accustomed to using edge devices, such as
mobile phones, smart watches, and VR devices in their
daily lives. A variety of technologies using DL are
gradually being applied to these edge devices. However,
there is a large number of computations in DL. It faces
a challenging problem how to provide solutions in the
edge devices. In this article, the proposed method
enables a flow with the RISC-V Packed extension (P
extension) in TVM. TVM, an open deep learning compiler
for neural network models, is growing as a key
infrastructure for DL computing. RISC-V is an open
instruction set architecture (ISA) with customized and
flexible features. The Packed-SIMD extension is a
RISC-V extension that enables subword
single-instruction multiple-data (SIMD) computations in
RISC-V architectures to support fallback engines in AI
computing. In the proposed flow, a fixed-point type
that is supported by an integer of 16-bit type and
saturation instructions is added to replace the
original 32-bit float type. In addition, an auto-tuning
method is proposed to use a uniform selector mechanism
(USM) to find the binary point position for fixed-point
type use. The tensorization feature of TVM can be used
to optimize specific hardware such as subword SIMD
instructions with RISC-V P extension. With our
experiment on the Spike simulator, the proposed method
with the USM can improve performance by approximately
2.54 to 6.15$ \times $ in terms of instruction counts
with little accuracy loss.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2023:HAQ,
author = "Shanshi Huang and Hongwu Jiang and Shimeng Yu",
title = "Hardware-aware Quantization\slash Mapping Strategies
for Compute-in-Memory Accelerators",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "34:1--34:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3569940",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3569940",
abstract = "The emerging non-volatile memory (eNVM) based
mixed-signal Compute-in-Memory (CIM) accelerators are
of great interest in today's AI accelerators design due
to their high energy efficiency. Various CIM
architectures and circuit-level designs have been
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Feng:2023:GGA,
author = "Lang Feng and Wenjian Liu and Chuliang Guo and Ke Tang
and Cheng Zhuo and Zhongfeng Wang",
title = "{GANDSE}: Generative Adversarial Network-based Design
Space Exploration for Neural Network Accelerator
Design",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "35:1--35:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3570926",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3570926",
abstract = "With the popularity of deep learning, the hardware
implementation platform of deep learning has received
increasing interest. Unlike the general purpose
devices, e.g., CPU or GPU, where the deep learning
algorithms are executed at the software level,
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2023:DDD,
author = "Junpeng Wang and Haitao Du and Bo Ding and Qi Xu and
Song Chen and Yi Kang",
title = "{DDAM}: Data Distribution-Aware Mapping of {CNNs} on
Processing-In-Memory Systems",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "36:1--36:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3576196",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3576196",
abstract = "Convolution neural networks (CNNs) are widely used
algorithms in image processing, natural language
processing and many other fields. The large amount of
memory access of CNNs is one of the major concerns in
CNN accelerator designs that influences the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rawat:2023:SNB,
author = "Bhawna Rawat and Poornima Mittal",
title = "A Switching {NMOS} Based Single Ended Sense Amplifier
for High Density {SRAM} Applications",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "37:1--37:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3576198",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3576198",
abstract = "The demand for single ended static random access
memory is growing, driven by the decreasing technology
node and increasing processing load. This mandates the
need for a single ended sense amplifier to be used
along with the memory. Consequently, a single
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pereira:2023:IED,
author = "Danny Pereira and Anirban Ghose and Sumana Ghosh and
Soumyajit Dey",
title = "Inferencing on Edge Devices: a Time- and Space-aware
Co-scheduling Approach",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "38:1--38:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3576197",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3576197",
abstract = "Neural Network (NN)-based real-time inferencing tasks
are often co-scheduled on GPGPU-style edge platforms.
Existing works advocate using different NN parameters
for the same detection task in different environments.
However, realizing such approaches \ldots{}",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2023:CFD,
author = "Yanze Huang and Kui Wen and Limei Lin and Li Xu and
Sun-Yuan Hsieh",
title = "Component Fault Diagnosability of Hierarchical Cubic
Networks",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "39:1--39:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3577018",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3577018",
abstract = "The fault diagnosability of a network indicates the
self-diagnosis ability of the network, thus it is an
important measure of robustness of the network. As a
neoteric feature for measuring fault diagnosability,
the r -component diagnosability \ldots{}",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Nie:2023:CMD,
author = "Qi Nie and Sharad Malik",
title = "{CNNFlow}: Memory-driven Data Flow Optimization for
Convolutional Neural Networks",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "40:1--40:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3577017",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3577017",
abstract = "Convolution Neural Networks (CNNs) are widely deployed
in computer vision applications. The datasets are
large, and the data reuse across different parts is
heavily interleaved. Given that memory access (SRAM and
especially DRAM) is more expensive in both \ldots{}",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{deOliveira:2023:MOO,
author = "Ricardo Gonzalez de Oliveira and Nicolas Navet and
Achim Henkel",
title = "Multi-Objective Optimization for Safety-Related
Available {E\slash E} Architectures Scoping Highly
Automated Driving Vehicles",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "41:1--41:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3582004",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3582004",
abstract = "Megatrends such as Highly Automated Driving (HAD) (SAE
>= Level 3), electrification, and connectivity are
reshaping the automotive industry. Together with the
new technologies, the business models will also evolve,
opening up new possibilities and new \ldots{}",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mahmoud:2023:LEP,
author = "Mervat M. A. Mahmoud and Nahla E. Elashkar and Heba H.
Draz",
title = "Low-energy Pipelined Hardware Design for Approximate
Medium Filter",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "42:1--42:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3582005",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3582005",
abstract = "Image and video processing algorithms are currently
crucial for many applications. Hardware implementation
of these algorithms provides higher speed for large
computation applications. Removing noise is often a
typical pre-processing step to enhance the \ldots{}",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cardona:2023:AMC,
author = "Jordi Cardona and Carles Hern{\'a}ndez and Jaume
Abella and Enrico Mezzetti and Francisco J. Cazorla",
title = "Accurately Measuring Contention in Mesh {NoCs} in
Time-Sensitive Embedded Systems",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3582006",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3582006",
abstract = "The computing capacity demanded by embedded systems is
on the rise as software implements more
functionalities, ranging from best-effort entertainment
functions to performance-guaranteed safety-related
functions. Heterogeneous manycore processors, using
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Du:2023:TLR,
author = "Yajuan Du and Siyi Huang and Yao Zhou and Qiao Li",
title = "Towards {LDPC} Read Performance of {$3$D} Flash
Memories with Layer-induced Error Characteristics",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "44:1--44:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3585075",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3585075",
abstract = "3D flash memories have been widely developed to
further increase the storage capacity of SSDs by
vertically stacking multiple layers. However, this
special physical structure brings new error
characteristics. Existing studies have discovered that
there \ldots{}",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhou:2023:FAO,
author = "Yuhao Zhou and Zhenxue He and Jianhui Jiang and Jia
Liu and Juncai He and Tao Wang and Limin Xiao and Xiang
Wang",
title = "Fast Area Optimization Approach for {XNOR\slash
OR}-based Fixed Polarity {Reed--Muller} Logic Circuits
based on Multi-strategy Wolf Pack Algorithm",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "45:1--45:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3587818",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3587818",
abstract = "Area optimization is one of the most important
contents of circuits logic synthesis. The smaller area
has stronger testability and lower cost. However,
searching for a circuit with the smallest area in a
large-scale space of polarity is a combinatorial
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2023:TPI,
author = "Senling Wang and Xihong Zhou and Yoshinobu Higami and
Hiroshi Takahashi and Hiroyuki Iwata and Yoichi Maeda
and Jun Matsushima",
title = "Test Point Insertion for Multi-Cycle Power-On
Self-Test",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "46:1--46:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3563552",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3563552",
abstract = "Under the functional safety standard ISO26262,
automotive systems require testing in the field, such
as the power-on self-test (POST). Unlike the production
test, the POST requires reducing the test application
time to meet the indispensable test quality \ldots{}",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Le:2023:PBM,
author = "Trung Le and Zhao Zhang and Zhichun Zhu",
title = "Polling-Based Memory Interface",
journal = j-TODAES,
volume = "28",
number = "3",
pages = "47:1--47:??",
month = may,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3572919",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Wed May 17 08:06:20 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3572919",
abstract = "Non-volatile memory has been extensively researched as
the alternative for a DRAM-based system; however, the
traditional memory controller cannot efficiently track
and schedule operations for all the memory devices in
heterogeneous systems due to \ldots{}",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Markov:2023:GEI,
author = "Igor Markov and Fan Yang and Li Shang and Hai Zhou",
title = "{Guest Editor}'s Introduction: Machine Learning for
{VLSI} Physical Design",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "48:1--48:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3592606",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3592606",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kashyap:2023:IIM,
author = "Suhas Krishna Kashyap and Sule Ozev",
title = "{IMPRoVED}: Integrated Method to Predict {PostRouting}
setup Violations in Early Design Stages",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "49:1--49:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3572546",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3572546",
abstract = "The detail routing process is by far the most time
consuming during the physical design flow. Routing
starts with an estimation of timing slacks and aims to
meet the timing specifications at signoff. In this
paper, we propose an improved method to predict
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hyun:2023:ROE,
author = "Daijoon Hyun and Sunwha Koh and Younggwang Jung and
Taeyoung Kim and Youngsoo Shin",
title = "Routability Optimization of Extreme Aspect Ratio
Design through Non-uniform Placement Utilization and
Selective Flip-flop Stacking",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "50:1--50:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3573387",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3573387",
abstract = "Circuits that are placed with very low (or high)
aspect ratio are susceptible to routing overflows. Such
designs are difficult to close and usually end up with
larger area with low area utilization. In this article,
we propose two routability optimization \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Utyamishev:2023:MPP,
author = "Dmitry Utyamishev and Inna Partin-Vaisband",
title = "Multiterminal Pathfinding in Practical {VLSI} Systems
with Deep Neural Networks",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "51:1--51:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564930",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564930",
abstract = "A multiterminal obstacle-avoiding pathfinding approach
is proposed. The approach is inspired by deep image
learning. The key idea is based on training a
conditional generative adversarial network (cGAN) to
interpret a pathfinding task as a graphical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cheng:2023:DDG,
author = "Chung-Kuan Cheng and Chester Holtz and Andrew B. Kahng
and Bill Lin and Uday Mallappa",
title = "{DAGSizer}: a Directed Graph Convolutional Network
Approach to Discrete Gate Sizing of {VLSI} Graphs",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "52:1--52:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3577019",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3577019",
abstract = "The objective of a leakage recovery step is to make
use of positive slack and reduce power by performing
appropriate standard-cell swaps such as
threshold-voltage ( V$_{th}$ ) or channel-length
reassignments. The resulting engineering change order
netlist needs \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2023:RDP,
author = "Ping-Wei Huang and Yao-Wen Chang",
title = "Routability-driven Power\slash Ground Network
Optimization Based on Machine Learning",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "53:1--53:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3587817",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3587817",
abstract = "The dynamic IR drop of a power/ground (PG) network is
a critical problem in modern circuit designs. Excessive
IR drop slows down circuit performance and causes
potential functional failures. Most industrial
practices tend to over-design the PG network for
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dong:2023:WCP,
author = "Xiao Dong and Yufei Chen and Jun Chen and Yucheng Wang
and Ji Li and Tianming Ni and Zhiguo Shi and Xunzhao
Yin and Cheng Zhuo",
title = "Worst-case Power Integrity Prediction Using
Convolutional Neural Network",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "54:1--54:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564932",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564932",
abstract = "Power integrity analysis is an essential step in power
distribution network (PDN) sign-off to ensure the
performance and reliability of chips. However, with the
growing PDN size and increasing scenarios to be
validated, it becomes very time- and resource-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lu:2023:EGS,
author = "Yi-Chen Lu and Siddhartha Nath and Sai Pentapati and
Sung Kyu Lim",
title = "{ECO-GNN}: Signoff Power Prediction Using Graph Neural
Networks with Subgraph Approximation",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "55:1--55:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3569942",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3569942",
abstract = "Modern electronic design automation flows depend on
both implementation and signoff tools to perform
timing-constrained power optimization through
Engineering Change Orders (ECOs), which involve gate
sizing and threshold-voltage ( V$_{th}$ )-assignment of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yang:2023:CCE,
author = "Dingcheng Yang and Haoyuan Li and Wenjian Yu and
Yuanbo Guo and Wenjie Liang",
title = "{CNN-Cap}: Effective Convolutional Neural
Network-based Capacitance Models for Interconnect
Capacitance Extraction",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "56:1--56:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3564931",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3564931",
abstract = "Accurate capacitance extraction is becoming more
important for designing integrated circuits under
advanced process technology. The pattern matching-based
full-chip extraction methodology delivers fast
computational speed but suffers from large error and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hou:2023:DLF,
author = "Tianshu Hou and Peining Zhen and Zhigang Ji and
Hai-Bao Chen",
title = "A Deep Learning Framework for Solving Stress-based
Partial Differential Equations in Electromigration
Analysis",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "57:1--57:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3567424",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3567424",
abstract = "The electromigration-induced reliability issues (EM)
in very large scale integration (VLSI) circuits have
attracted continuous attention due to technology
scaling. Traditional EM methods lead to inaccurate
results incompatible with the advanced technology
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhang:2023:CCM,
author = "Qing Zhang and Huajie Huang and Jizuo Li and Yuhang
Zhang and Yongfu Li",
title = "{CmpCNN}: {CMP} Modeling with Transfer Learning {CNN}
Architecture",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "58:1--58:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3569941",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3569941",
abstract = "Performing chemical mechanical polishing (CMP)
modeling for physical verification on an integrated
circuit (IC) chip is vital to minimize its
manufacturing yield loss. Traditional CMP models
calculate post-CMP topography height of the IC's layout
based on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Aseeri:2023:PTA,
author = "Ahmad O. Aseeri",
title = "A Problem-tailored Adversarial Deep Neural
Network-Based Attack Model for Feed-Forward Physical
Unclonable Functions",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "59:1--59:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3557742",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3557742",
abstract = "With the exceeding advancement in technology, the
sophistication of attacks is considerably increasing.
Standard security methods fall short of achieving the
security essentials of IoT against physical attacks due
to the nature of IoTs being resource-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bhattacharjee:2023:SGG,
author = "Abhiroop Bhattacharjee and Priyadarshini Panda",
title = "{SwitchX}: Gmin-Gmax Switching for Energy-efficient
and Robust Implementation of Binarized Neural Networks
on {ReRAM} Xbars",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "60:1--60:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3576195",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3576195",
abstract = "Memristive crossbars can efficiently implement
Binarized Neural Networks (BNNs) wherein the weights
are stored in high-resistance states (HRS) and
low-resistance states (LRS) of the synapses. We propose
SwitchX mapping of BNN weights onto ReRAM crossbars
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2023:STB,
author = "Po-Hsuan Huang and Chia-Heng Tu and Shen-Ming Chung
and Pei-Yuan Wu and Tung-Lin Tsai and Yi-An Lin and
Chun-Yi Dai and Tzu-Yi Liao",
title = "{SecureTVM}: a {TVM}-based Compiler Framework for
Selective Privacy-preserving Neural Inference",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "61:1--61:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3579049",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3579049",
abstract = "Privacy-preserving neural inference helps protect both
the user input data and the model weights from being
leaked to others during the inference of a deep
learning model. To achieve data protection, the
inference is often performed within a secure domain,.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ibrahim:2023:OPR,
author = "Abrar A. Ibrahim and Ahmed M. Y. Ibrahim and Mohamed
Watheq El-Kharashi and Mona Safar",
title = "Optimal Pattern Retargeting in {IEEE 1687} Networks: a
{SAT}-based Upper-Bound Computation",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "62:1--62:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3585074",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3585074",
abstract = "A growing number of embedded instruments is being
integrated into System-on-Chips for testing,
monitoring, and several other purposes. To standardize
their access protocols, the IEEE 1687 (IJTAG) standard
has defined a flexible network infrastructure.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ferres:2023:CFF,
author = "Bruno Ferres and Olivier Muller and Fr{\'e}d{\'e}ric
Rousseau",
title = "A Chisel Framework for Flexible Design Space
Exploration through a Functional Approach",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "63:1--63:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3590769",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3590769",
abstract = "As the need for efficient digital circuits is ever
growing in the industry, the design of such systems
remains daunting, requiring both expertise and time. In
an attempt to close the gap between software
development and hardware design, powerful features
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Khan:2023:HEC,
author = "Muhammad Imran Khan",
title = "Harmonic Estimation and Comparative Analysis of
Ultra-High Speed Flip-Flop and Latch Topologies for Low
Power and High Performance Future Generation
Micro-\slash Nano Electronic Systems",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "64:1--64:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3590770",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3590770",
abstract = "This paper presents estimation and analysis of the
higher order harmonics, power features, and real
performance of flip-flop and master-slave latch
topologies. This research article outlines the impact
of transistor model quality and input signal \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{He:2023:SEM,
author = "Xu He and Yao Wang and Chang Liu and Qiang Wu and Juan
Luo and Yang Guo",
title = "A Soft-Error Mitigation Approach Using Pulse Quenching
Enhancement at Detailed Placement for Combinational
Circuits",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "65:1--65:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3595637",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3595637",
abstract = "As technology continuously shrinks, radiation-induced
soft errors have become a great threat to the circuit
reliability. Among all the causes, the Single-Event
Transient (SET) effect is the dominating one for the
radiation-induced soft errors. SET-induced \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kazerooni-Zand:2023:MBM,
author = "Reza Kazerooni-Zand and Mehdi Kamal and Ali
Afzali-Kusha and Massoud Pedram",
title = "Memristive-based Mixed-signal {CGRA} for Accelerating
Deep Neural Network Inference",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "66:1--66:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3595638",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3595638",
abstract = "In this paper, a mixed-signal coarse-grained
reconfigurable architecture (CGRA) for accelerating
inference in deep neural networks (DNNs) is presented.
It is based on performing dot-product computations
using analog computing to achieve a considerable
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chu:2023:ADC,
author = "Cheng Chu and Cheng Liu and Dawen Xu and Ying Wang and
Tao Luo and Huawei Li and Xiaowei Li",
title = "Accelerating Deformable Convolution Networks with
Dynamic and Irregular Memory Accesses",
journal = j-TODAES,
volume = "28",
number = "4",
pages = "67:1--67:??",
month = jul,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3597431",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:08 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3597431",
abstract = "Deformable convolution networks (DCNs) proposed to
address image recognition with geometric or photometric
variations typically involve deformable convolution
that convolves on arbitrary locations of input
features. The locations change with different
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jiang:2023:ISS,
author = "Iris Hru Jiang and David Chinnery and Gracieli Posser
and Jens Lienig",
title = "Introduction to the Special Section on Advances in
Physical Design Automation",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "68:1--68:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3604593",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3604593",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gopalakrishnan:2023:GMW,
author = "Ramprasath Srinivasa Gopalakrishnan and Meghna
Madhusudan and Arvind K. Sharma and Jitesh Poojary and
Soner Yaldiz and Ramesh Harjani and Steven M. Burns and
Sachin S. Sapatnekar",
title = "A Generalized Methodology for Well Island Generation
and Well-tap Insertion in Analog\slash Mixed-signal
Layouts",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "69:1--69:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3580477",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3580477",
abstract = "Well island generation and well tap placement is an
important problem in analog/mixed-signal (AMS)
circuits. Well taps can only prevent latchups within a
certain radius of influence within a well island, and
hence must be appropriately inserted to cover
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wei:2023:APP,
author = "Min Wei and Xingyu Tong and Yuan Wen and Jianli Chen
and Jun Yu and Wenxing Zhu and Yao-Wen Chang",
title = "Analytical Placement with {$3$D} {Poisson}'s Equation
and {ADMM}-based Optimization for Large-scale {2.5D}
Heterogeneous {FPGAs}",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "70:1--70:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3582554",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3582554",
abstract = "As design complexity keeps increasing, the 2.5D
field-programmable gate array (FPGA) with large logic
capacity has become popular in modern circuit
applications. A 2.5D FPGA consists of multiple dies
connected through super long lines (SLLs) on an
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hougardy:2023:FOD,
author = "Stefan Hougardy and Meike Neuwohner and Ulrike
Schorr",
title = "A Fast Optimal Double-row Legalization Algorithm",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "71:1--71:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3579844",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3579844",
abstract = "In Placement Legalization, it is often assumed that
(almost) all standard cells possess the same height and
can therefore be aligned in cell rows, which can then
be treated independently. However, this is no longer
true for recent technologies, where a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Daboul:2023:GIO,
author = "Siad Daboul and Stephan Held and Bento Natura and
Daniel Rotter",
title = "Global Interconnect Optimization",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "72:1--72:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3587044",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3587044",
abstract = "We propose a new comprehensive solution to global
interconnect optimization. Traditional buffering
algorithms mostly insert repeaters on a net-by-net
basis based on slacks and possibly guided by global
wires. We show how to integrate routing congestion,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhou:2023:MMR,
author = "Zhonghua Zhou and Yuxuan Pan and Guy G. F. Lemieux and
Andr{\'e} Ivanov",
title = "{MEDUSA}: a Multi-Resolution Machine Learning
Congestion Estimation Method for {$2$D} and {$3$D}
Global Routing",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "73:1--73:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3590768",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3590768",
abstract = "Routing congestion is one of the many factors that
need to be minimized during the physical design phase
of large integrated circuits. In this article, we
propose a novel congestion estimation method, called
MEDUSA, that consists of three parts: (1) a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zheng:2023:BVD,
author = "Su Zheng and Hao Geng and Chen Bai and Bei Yu and
Martin D. F. Wong",
title = "Boosting {VLSI} Design Flow Parameter Tuning with
Random Embedding and Multi-objective Trust-region
{Bayesian} Optimization",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "74:1--74:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3597931",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3597931",
abstract = "Modern very large-scale integration (VLSI) design
requires the implementation of integrated circuits
using electronic design automation (EDA) tools. Due to
the complexity of EDA algorithms, there are numerous
tool parameters that have imperative impacts \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Murali:2023:PSR,
author = "Gauthaman Murali and Anthony Agnesina and Sung Kyu
Lim",
title = "A {PPA} Study of Reinforced Placement Parameter
Autotuning: Pseudo-{$3$D} vs. True-{$3$D} Placers",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "75:1--75:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3582007",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3582007",
abstract = "D Place and Route (P\&R) flows either involve true-3D
placement algorithms or use commercial 2D tools to
transform a 2D design into a 3D design. Irrespective of
the nature of the placers, several placement parameters
in these tools affect the quality of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Vanna-Iampikul:2023:GBM,
author = "Pruek Vanna-Iampikul and Yi-Chen Lu and Da Eun Shim
and Sung Kyu Lim",
title = "{GNN}-based Multi-bit Flip-flop Clustering and
Post-clustering Design Optimization for
Energy-efficient {$3$D} {ICs}",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "76:1--76:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3588570",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3588570",
abstract = "In high-performance three-dimensional Integrated
Circuits (3D ICs), clock networks consume a large
portion of the full-chip power. However, no previous 3D
IC work has ever optimized 3D clock networks for both
power and performance simultaneously, which \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2023:IBS,
author = "Jun-Sheng Wu and Chi-An Pan and Yi-Yu Liu",
title = "{ILP}-based Substrate Routing with Mismatched Via
Dimension Consideration for Wire-bonding {FBGA} Package
Design",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "77:1--77:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3579843",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3579843",
abstract = "With the rapidly growing demand for system-level
integration, package substrates have become one of the
most important carriers in semiconductor industry. Fine
pitch ball grid array (FBGA) packaging is a widely used
technology thanks to its relative cost-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2023:CPN,
author = "Yanjiang Liu and Junwei Li and Tongzhou Qu and Zibin
Dai",
title = "{CBDC-PUF}: a Novel Physical Unclonable Function
Design Framework Utilizing Configurable Butterfly Delay
Chain Against Modeling Attack",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "78:1--78:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3588435",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3588435",
abstract = "Physical unclonable function (PUF) is a promising
security-based primitive, which provides an extremely
large number of responses for key generation and
authentication applications. Various PUFs have been
developed as central building blocks in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Aghaeekiasaraee:2023:CFR,
author = "Erfan Aghaeekiasaraee and Aysa Fakheri Tabrizi and
Tiago Augusto Fontana and Renan Netto and Sheiny Fabre
Almeida and Upma Gandhi and Jos{\'e} Lu{\'\i}s
G{\"u}ntzel and David Westwick and Laleh Behjat",
title = "{CRP2.0}: a Fast and Robust Cooperation between
Routing and Placement in Advanced Technology Nodes",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "79:1--79:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3590962",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3590962",
abstract = "Traditionally, the placement and routing stages of a
physical design are performed separately. Because of
the additional complexities arising in advanced
technology nodes, they have become more interdependent.
Therefore, creating efficient cooperation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhu:2023:DSE,
author = "Binwu Zhu and Xinyun Zhang and Yibo Lin and Bei Yu and
Martin Wong",
title = "{DRC-SG 2.0}: Efficient Design Rule Checking Script
Generation via Key Information Extraction",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "80:1--80:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3594666",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3594666",
abstract = "Design Rule Checking (DRC) is a critical step in
integrated circuit design. DRC requires formatted
scripts as the input to design rule checkers. However,
these scripts are manually generated in the foundry,
which is tedious and error prone for generation
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kritikakou:2023:MMS,
author = "Angeliki Kritikakou and Stefanos Skalistis",
title = "Mitigating Mode-switch through Run-time Computation of
Response Time",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "81:1--81:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3597432",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3597432",
abstract = "Mixed-critical systems consist of applications with
different criticality. In these systems, different
confidence levels of Worst-Case Execution Time (WCET)
estimations are used. Dual criticality systems use a
less pessimistic, but with lower level of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2023:BIH,
author = "Zilu Wang and Xinming Shi and Xin Yao",
title = "A Brain-Inspired Hardware Architecture for
Evolutionary Algorithms Based on Memristive Arrays",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "82:1--82:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3598421",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3598421",
abstract = "Brain-inspired computing takes inspiration from the
brain to create energy-efficient hardware systems for
information processing, capable of performing highly
sophisticated tasks. Systems built with emerging
electronics, such as memristive devices, can \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Monjur:2023:HSR,
author = "Mohammad Monjur and Joshua Calzadillas and Qiaoyan
Yu",
title = "Hardware Security Risks and Threat Analyses in
Advanced Manufacturing Industry",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "83:1--83:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3603502",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3603502",
abstract = "The advanced manufacturing industry (AMI) faces many
unique challenges from the cyber-physical domain.
Security threats are originated from two integral
parts: software and hardware. Over the past decade,
software security has been addressed extensively,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Narang:2023:DPM,
author = "Gaurav Narang and Aryan Deshwal and Raid Ayoub and
Michael Kishinevsky and Janardhan Rao Doppa and Partha
Pratim Pande",
title = "Dynamic Power Management in Large Manycore Systems: a
Learning-to-Search Framework",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "84:1--84:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3603501",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3603501",
abstract = "The complexity of manycore System-on-chips (SoCs) is
growing faster than our ability to manage them to
reduce the overall energy consumption. Further, as SoC
design moves toward three-dimensional (3D)
architectures, the core's power density increases
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tan:2023:IPC,
author = "Jingweijia Tan and Weiren Wang and Maodi Ma and
Xiaohui Wei and Kaige Yan",
title = "Improving the Performance of {CNN} Accelerator
Architecture under the Impact of Process Variations",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "85:1--85:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3604236",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3604236",
abstract = "Convolutional neural network (CNN) accelerators are
popular specialized platforms for efficient CNN
processing. As semiconductor manufacturing technology
scales down to nano scale, process variation
dramatically affects the chip's quality. Process
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2023:CAT,
author = "Meng-Jing Li and Yu-Chuan Yen and Yi-Ting Li and
Yung-Chih Chen and Chun-Yao Wang",
title = "A Constructive Approach for Threshold Function
Identification",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "86:1--86:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3606371",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3606371",
abstract = "Threshold Function (TF) is a subset of Boolean
function that can be represented with a single linear
threshold gate (LTG). In the research about threshold
logic, the identification of TF is an important task
that determines whether a given function is a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yamin:2023:UAE,
author = "Nuzhat Yamin and Ganapati Bhat",
title = "Uncertainty-aware Energy Harvest Prediction and
Management for {IoT} Devices",
journal = j-TODAES,
volume = "28",
number = "5",
pages = "87:1--87:??",
month = sep,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3606372",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 18 09:07:10 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3606372",
abstract = "Internet of things (IoT) devices are popular in
several high-impact applications such as mobile
healthcare and digital agriculture. However, IoT
devices have limited operating lifetime due to their
small form factor. Harvesting energy from ambient
sources \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhang:2023:SKR,
author = "Ruisi Zhang and Shehzeen Hussain and Huili Chen and
Mojan Javaheripi and Farinaz Koushanfar",
title = "Systemization of Knowledge: Robust Deep Learning using
Hardware--Software Co-design in Centralized and
Federated Settings",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "88:1--88:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3616868",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3616868",
abstract = "Deep learning (DL) models are enabling a significant
paradigm shift in a diverse range of fields, including
natural language processing and computer vision, as
well as the design and automation of complex integrated
circuits. While the deep models --- and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lu:2023:SPI,
author = "Huaixi Lu and Yue Xing and Aarti Gupta and Sharad
Malik",
title = "{SoC} Protocol Implementation Verification Using
Instruction-Level Abstraction Specifications",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "89:1--89:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3610292",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3610292",
abstract = "In modern systems-on-chips, several hardware protocols
are used for communication and interaction among
different modules. These protocols are complex and need
to be implemented correctly for correct operation of
the system-on-chip. Therefore, protocol \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{He:2023:GLP,
author = "Xu He and Yao Wang and Zhiyong Fu and Yipei Wang and
Yang Guo",
title = "A General Layout Pattern Clustering Using Geometric
Matching-based Clip Relocation and Lower-bound Aided
Optimization",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "90:1--90:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3610293",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3610293",
abstract = "With the continuous shrinking of feature size,
detection of lithography hotspots has been raised as
one of the major concerns in
Design-for-Manufacturability (DFM) of semiconductor
processing. Hotspot detection, along with other DFM
measures, trades off \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chang:2023:HPM,
author = "Yajing Chang and Yingjian Yan and Chunsheng Zhu and
Yanjiang Liu",
title = "A High-performance Masking Design Approach for {Saber}
against High-order Side-channel Attack",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "91:1--91:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3611670",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3611670",
abstract = "Post-quantum cryptography (PQC) has become the most
promising cryptographic scheme against the threat of
quantum computing to conventional public-key
cryptographic schemes. Saber, as the finalist in the
third round of the PQC standardization procedure,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Venieris:2023:MMW,
author = "Stylianos I. Venieris and Javier Fernandez-Marques and
Nicholas D. Lane",
title = "Mitigating Memory Wall Effects in {CNN} Engines with
On-the-Fly Weights Generation",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "92:1--92:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3611673",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3611673",
abstract = "The unprecedented accuracy of convolutional neural
networks (CNNs) across a broad range of AI tasks has
led to their widespread deployment in mobile and
embedded settings. In a pursuit for high-performance
and energy-efficient inference, significant \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Choudhury:2023:EPF,
author = "Muhtadi Choudhury and Minyan Gao and Avinash Varna and
Elad Peer and Domenic Forte",
title = "Enhanced {PATRON}: Fault Injection and Power-aware
{FSM} Encoding Through Linear Programming",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "93:1--93:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3611669",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3611669",
abstract = "Since finite state machines (FSMs) regulate the
control flow in circuits, a computing system's security
might be breached by attacking the FSM. Physical
attacks are especially worrisome because they can
bypass software countermeasures. For example, an
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dahiya:2023:MDS,
author = "Ayush Dahiya and Poornima Mittal and Rajesh Rohilla",
title = "Modified Decoupled Sense Amplifier with Improved
Sensing Speed for Low-Voltage Differential {SRAM}",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "94:1--94:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3611672",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3611672",
abstract = "A modified decoupled sense amplifier (MDSA) and
modified decoupled sense amplifier with NMOS
foot-switch is proposed for improved sensing in
differential SRAM for low-voltage operation at the
22-nm technology node. The MDSA and MDSANF both offer
notable \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Naseer:2023:QGA,
author = "Mahum Naseer and Osman Hasan and Muhammad Shafique",
title = "{QuanDA}: {GPU} Accelerated Quantitative Deep Neural
Network Analysis",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "95:1--95:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3611671",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3611671",
abstract = "Over the past years, numerous studies demonstrated the
vulnerability of deep neural networks (DNNs) to make
correct classifications in the presence of small noise.
This motivated the formal analysis of DNNs to ensure
that they delineate acceptable \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rawat:2023:RSB,
author = "Bhawna Rawat and Poornima Mittal",
title = "A Reconfigurable {7T} {SRAM} Bit Cell for High Speed,
Power Saving and Low Voltage Application",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "96:1--96:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3616872",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3616872",
abstract = "The decreasing operational voltage and scaled
technology node for memory designing has widened the
gap between two crucial parameters for an SRAM ---
delay and power. As the demand for internet of things
is increasing, the need for round the clock \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sivakumar:2023:SAL,
author = "S. Sivakumar and John Jose",
title = "Self Adaptive Logical Split Cache Techniques for
Delayed Aging of {NVM LLC}",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "97:1--97:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3616871",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3616871",
abstract = "Due to the technological advancements in the last few
decades, several applications have emerged that demand
more computing power and on-chip and off-chip memories.
However, the scaling of memory technologies is not at
par with computing throughput of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Esper:2023:ASF,
author = "Khalil Esper and Stefan Wildermann and J{\"u}rgen
Teich",
title = "Automatic Synthesis of {FSMs} for Enforcing
Non-functional Requirements on {MPSoCs} Using
Multi-objective Evolutionary Algorithms",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "98:1--98:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3617832",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3617832",
abstract = "Embedded system applications often require guarantees
regarding non-functional properties when executed on a
given MPSoC platform. Examples of such requirements
include real-time, energy, or safety properties on
corresponding programs. One option to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Senapati:2023:TTA,
author = "Debabrata Senapati and Kousik Rajesh and Chandan Karfa
and Arnab Sarkar",
title = "{TMDS}: Temperature-aware Makespan Minimizing {DAG}
Scheduler for Heterogeneous Distributed Systems",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "99:1--99:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3616869",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3616869",
abstract = "To meet application-specific performance demands,
recent embedded platforms often involve the use of
intricate micro-architectural designs and very small
feature sizes leading to complex chips with
multi-million gates. Such ultra-high gate densities
often \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hong:2023:PMC,
author = "Qinghui Hong and Richeng Huang and Pingdan Xiao and
Jun Li and Jingru Sun and Jiliang Zhang",
title = "Programmable In-memory Computing Circuit of {Fast
Hartley Transform}",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "100:1--100:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3618112",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3618112",
abstract = "Discrete Hartley transform is a core component of
digital signal processing because of its advantages of
fast computing speed and less power consumption.
Traditional FPGA-based implementation methods have the
disadvantage of high latency, which cannot \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kundu:2023:MTF,
author = "Debraj Kundu and Sudip Roy",
title = "Multi-target Fluid Mixing in {MEDA} Biochips: Theory
and an Attempt toward Waste Minimization",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "101:1--101:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3622785",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3622785",
abstract = "Sample preparation is an inherent procedure of many
biochemical applications, and digital microfluidic
biochips (DMBs) have proved to be very effective in
performing such a procedure. In a single mixing step,
conventional DMBs can mix two droplets in a 1:.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhou:2023:SLR,
author = "Shanglin Zhou and Mikhail A. Bragin and Deniz Gurevin
and Lynn Pepin and Fei Miao and Caiwen Ding",
title = "Surrogate {Lagrangian} Relaxation: a Path to
Retrain-Free Deep Neural Network Pruning",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "102:1--102:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3624476",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3624476",
abstract = "Network pruning is a widely used technique to reduce
computation cost and model size for deep neural
networks. However, the typical three-stage pipeline
(i.e., training, pruning, and retraining (fine-tuning))
significantly increases the overall training \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ding:2023:TMP,
author = "Bo Ding and Jinglei Huang and Junpeng Wang and Qi Xu
and Song Chen and Yi Kang",
title = "Task Modules Partitioning, Scheduling and
Floorplanning for Partially Dynamically Reconfigurable
Systems with Heterogeneous Resources",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "103:1--103:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3625295",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3625295",
abstract = "Some field programmable gate arrays (FPGAs) can be
partially dynamically reconfigurable with heterogeneous
resources distributed on the chip. FPGA-based partially
dynamically reconfigurable system (FPGA-PDRS) can be
used to accelerate computing and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lin:2023:SRB,
author = "Wenxiong Lin and Haojie Wu and Peng Gao and Wenjun Luo
and Shuting Cai and Xiaoming Xiong",
title = "Sequential Routing-based Time-division Multiplexing
Optimization for Multi-{FPGA} Systems",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "104:1--104:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3626322",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3626322",
abstract = "Multi-field programming gate array (FPGA) systems are
widely used in various circuit design-related areas,
such as hardware emulation, virtual prototypes, and
chiplet design methodologies. However, a physical
resource clash between inter-FPGA signals and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Praveen:2023:DER,
author = "Pushkar Praveen and R. K. Singh",
title = "Design of Enhanced Reversible {9T} {SRAM} Design for
the Reduction in Sub-threshold Leakage Current with14nm
{FinFET} Technology",
journal = j-TODAES,
volume = "28",
number = "6",
pages = "105:1--105:??",
month = nov,
year = "2023",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3616538",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 10 09:53:53 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3616538",
abstract = "Power dissipation is considered one of the important
issues in low power Very-large-scale integration (VLSI)
circuit design and is related to the threshold voltage.
Generally, the sub-threshold leakage current and the
leakage power dissipation are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ni:2024:ISI,
author = "Tianming Ni and Xiaoqing Wen and Hussam Amrouch and
Cheng Zhuo and Peilin Song",
title = "Introduction to the Special Issue on Design for
Testability and Reliability of Security-aware
Hardware",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3631476",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3631476",
abstract = "The research on design for testability and reliability
of security-aware hardware has been important in both
academia and industry. With ever-growing globalization,
commercial hardware design, manufacturing,
transportation, and supply now involve many \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cui:2024:ERO,
author = "Yijun Cui and Jiang Li and Yunpeng Chen and Chenghua
Wang and Chongyan Gu and M{\'a}ire O'neill and Weiqiang
Liu",
title = "An Efficient Ring Oscillator {PUF} Using Programmable
Delay Units on {FPGA}",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3593807",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3593807",
abstract = "The ring oscillator (RO) PUF can be implemented on
different FPGA platforms with high uniqueness and
reliability. To decrease the hardware cost of
conventional RO PUFs, a new design using the
programmable delay units is proposed, namely, PRO PUF.
The \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2024:PLC,
author = "Taixin Li and Boran Sun and Hongtao Zhong and Yixin Xu
and Vijaykrishnan Narayanan and Liang Shi and Tianyi
Wang and Yao Yu and Thomas K{\"a}mpfe and Kai Ni and
Huazhong Yang and Xueqing Li",
title = "{ProtFe}: Low-Cost Secure Power Side-Channel
Protection for General and Custom {FeFET}-Based
Memories",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3604589",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3604589",
abstract = "Ferroelectric Field Effect Transistors (FeFETs) have
spurred increasing interest in both memories and
computing applications, thanks to their CMOS
compatibility, low-power operation, and high
scalability. However, new security threats to the
FeFET-based \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pan:2024:CEP,
author = "Zijin Pan and Xunyu Li and Weiquan Hao and Runyu Miao
and Albert Wang",
title = "On-chip {ESD} Protection Design Methodologies by {CAD}
Simulation",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3593808",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3593808",
abstract = "Electrostatic discharge (ESD) can cause malfunction or
failure of integrated circuits (ICs). On-chip ESD
protection design is a major IC design-for-reliability
(DfR) challenge, particularly for complex chips made in
advanced technology nodes. Traditional \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bian:2024:RAS,
author = "Jingchang Bian and Zhengfeng Huang and Peng Ye and
Zhao Yang and Huaguo Liang",
title = "A Reliability-Aware Splitting Duty-Cycle Physical
Unclonable Function Based on Trade-off Process,
Voltage, and Temperature Variations",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3594667",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3594667",
abstract = "The physical unclonable function (PUF) is a hardware
security primitive that can be used to prevent
malicious attacks aimed at obtaining device information
at the hardware level. The ring oscillator (RO) PUF has
attracted considerable research attention. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhang:2024:HTS,
author = "Yuan Zhang and Jiliang Zhang",
title = "A High Throughput {STR}-based {TRNG} by Jitter Precise
Quantization Superposing",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3606373",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3606373",
abstract = "With the rapid development of integrated circuits and
the continuous progress of computing capability, higher
demands have been placed on the security and speed of
data encryption in security systems. As a basic
hardware security primitive, the true \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xiang:2024:TCL,
author = "Dong Xiang",
title = "Test Compression for Launch-on-Capture Transition
Fault Testing",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3597433",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3597433",
abstract = "A new low-power test compression scheme, called
Dcompress, is proposed for launch-on-capture transition
fault testing by using a new seed encoding scheme, a
new design for testability architecture, and a new
low-power test application procedure. The new
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bi:2024:AVA,
author = "Yongtian Bi and Qi Xu and Hao Geng and Song Chen and
Yi Kang",
title = "{AD$^2$VNCS}: Adversarial Defense and Device
Variation-tolerance in Memristive Crossbar-based
Neuromorphic Computing Systems",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3600231",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3600231",
abstract = "In recent years, memristive crossbar-based
neuromorphic computing systems (NCS) have obtained
extremely high performance in neural network
acceleration. However, adversarial attacks and
conductance variations of memristors bring reliability
challenges to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Calzada:2024:HIS,
author = "Paul E. Calzada and Md. Sami {Ul Islam Sami} and Kimia
Zamiri Azar and Fahim Rahman and Farimah Farahmandi and
Mark Tehranipoor",
title = "Heterogeneous Integration Supply Chain Integrity
Through Blockchain and {CHSM}",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3625823",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3625823",
abstract = "Over the past few decades, electronics have become
commonplace in government, commercial, and social
domains. These devices have developed rapidly, as seen
in the prevalent use of system-on-chips rather than
separate integrated circuits on a single \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Cui:2024:RAA,
author = "Xiaole Cui and Mingqi Yin and Hanqing Liu and Xiaoxin
Cui",
title = "The Resistance Analysis Attack and Security
Enhancement of the {IMC LUT} Based on the Complementary
Resistive Switch Cells",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3616870",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3616870",
abstract = "The resistive random access memory (RRAM) based
in-memory computing (IMC) is an emerging architecture
to address the challenge of the ``memory wall''
problem. The complementary resistive switch (CRS) cell
connects two bipolar RRAM elements anti-serially to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xiao:2024:IRI,
author = "Jie Xiao and Yingying Ge and Ru Wang and Jungang Lou",
title = "{ICP-RL}: Identifying Critical Paths for Fault
Diagnosis Using Reinforcement Learning",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3610294",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3610294",
abstract = "Identifying the critical paths is crucial to reducing
the complexity of performance analysis and reliability
calculation for logic circuits. In this article, we
propose a method for identifying the critical path in a
combination circuit using a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Guo:2024:YOA,
author = "Nanlin Guo and Fulin Peng and Jiahe Shi and Fan Yang
and Jun Tao and Xuan Zeng",
title = "Yield Optimization for Analog Circuits over Multiple
Corners via {Bayesian} Neural Networks: Enhancing
Circuit Reliability under Environmental Variation",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3626321",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3626321",
abstract = "The reliability of circuits is significantly affected
by process variations in manufacturing and
environmental variation during operation. Current yield
optimization algorithms take process variations into
consideration to improve circuit reliability.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Peng:2024:CTD,
author = "Qingsong Peng and Jingchang Bian and Zhengfeng Huang
and Senling Wang and Aibin Yan",
title = "A Compact {TRNG} Design for {FPGA} Based on the
Metastability of {RO}-driven Shift Registers",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3610295",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3610295",
abstract = "True random number generators (TRNGs), as an important
component of security systems, have received a lot of
attention for their related research. The previous
researches have provided a large number of TRNG
solutions, however, they still failed to reach
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sun:2024:LLD,
author = "Rihui Sun and Pengfei Qiu and Yongqiang Lyu and Jian
Dong and Haixia Wang and Dongsheng Wang and Gang Qu",
title = "{Lightning}: Leveraging {DVFS-induced} Transient Fault
Injection to Attack Deep Learning Accelerator of
{GPUs}",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3617893",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3617893",
abstract = "Graphics Processing Units (GPU) are widely used as
deep learning accelerators because of its high
performance and low power consumption. Additionally, it
remains secure against hardware-induced transient fault
injection attacks, a classic type of attacks \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Saglican:2024:MDV,
author = "Enes Sa{\u{g}}lican and Engin Afacan",
title = "{MOEA\slash D} vs. {NSGA-II}: a Comprehensive
Comparison for Multi\slash Many Objective Analog\slash
{RF} Circuit Optimization through a Generic Benchmark",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3626096",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3626096",
abstract = "Thanks to the enhanced computational capacity of
modern computers, even sophisticated analog/radio
frequency (RF) circuit sizing problems can be solved
via electronic design automation (EDA) tools. Recently,
several analog/RF circuit optimization \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rapp:2024:NAI,
author = "Martin Rapp and Heba Khdr and Nikita Krohmer and
J{\"o}rg Henkel",
title = "{NPU}-Accelerated Imitation Learning for Thermal
Optimization of {QoS}-Constrained Heterogeneous
Multi-Cores",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3626320",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3626320",
abstract = "Thermal optimization of a heterogeneous clustered
multi-core processor under user-defined QoS targets
requires application migration and DVFS. However,
selecting the core to execute each application and the
VF levels of each cluster is a complex problem
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dewan:2024:CAM,
author = "Monzurul Islam Dewan and Sheng-En David Lin and Dae
Hyun Kim",
title = "Construction of All Multilayer Monolithic {RSMTs} and
Its Application to Monolithic {$3$D} {IC} Routing",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3626958",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3626958",
abstract = "Monolithic three-dimensional (M3D) integration allows
ultra-thin silicon tier stacking in a single package.
The high-density stacking is acquiring interest and is
becoming more popular for smaller footprint areas,
shorter wirelength, higher performance, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chhabria:2024:MLA,
author = "Vidya A. Chhabria and Wenjing Jiang and Andrew B.
Kahng and Sachin S. Sapatnekar",
title = "A Machine Learning Approach to Improving Timing
Consistency between Global Route and Detailed Route",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3626959",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3626959",
abstract = "Due to the unavailability of routing information in
design stages prior to detailed routing (DR), the tasks
of timing prediction and optimization pose major
challenges. Inaccurate timing prediction wastes design
effort, hurts circuit performance, and may \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pandey:2024:NDT,
author = "Shailja Pandey and Lokesh Siddhu and Preeti Ranjan
Panda",
title = "{NeuroCool}: Dynamic Thermal Management of {$3$D}
{DRAM} for Deep Neural Networks through Customized
Prefetching",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "19:1--19:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3630012",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3630012",
abstract = "Deep neural network (DNN) implementations are
typically characterized by huge datasets and concurrent
computation, resulting in a demand for high memory
bandwidth due to intensive data movement between
processors and off-chip memory. Performing DNN
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bai:2024:BER,
author = "Chen Bai and Qi Sun and Jianwang Zhai and Yuzhe Ma and
Bei Yu and Martin D. F. Wong",
title = "{BOOM-Explorer}: {RISC-V} {BOOM} Microarchitecture
Design Space Exploration",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "20:1--20:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3630013",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3630013",
abstract = "Microarchitecture parameters tuning is critical in the
microprocessor design cycle. It is a non-trivial design
space exploration (DSE) problem due to the large
solution space, cycle-accurate simulators' modeling
inaccuracy, and high simulation runtime for performance
evaluations. Previous methods require massive expert
efforts to construct interpretable equations or high
computing resource demands to train black-box
prediction models. This article follows the black-box
methods due to better solution qualities than
analytical methods in general. We summarize two learned
lessons and propose BOOM-Explorer accordingly. First,
embedding microarchitecture domain knowledge in the DSE
improves the solution quality. Second, BOOM-Explorer
makes the microarchitecture DSE for
register-transfer-level designs within the limited time
budget feasible. We enhance BOOM-Explorer with the
diversity-guidance, further improving the algorithm
performance. Experimental results with RISC-V
Berkeley-Out-of-Order Machine under 7-nm technology
show that our proposed methodology achieves an average
of 18.75\% higher Pareto hypervolume, 35.47\% less
average distance to reference set, and 65.38\% less
overall running time compared to previous approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2024:MFO,
author = "Wanqian Li and Yinhe Han and Xiaoming Chen",
title = "Mathematical Framework for Optimizing Crossbar
Allocation for {ReRAM}-based {CNN} Accelerators",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "21:1--21:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3631523",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3631523",
abstract = "The resistive random-access memory (ReRAM) has widely
been used to accelerate convolutional neural networks
(CNNs) thanks to its analog in-memory computing
capability. ReRAM crossbars not only store layers'
weights, but also perform in-situ matrix-vector
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2024:FDC,
author = "Dan Wu and Peng Chen and Thilini Kaushalya Bandara and
Zhaoying Li and Tulika Mitra",
title = "{Flip}: Data-centric Edge {CGRA} Accelerator",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "22:1--22:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3631118",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3631118",
abstract = "Coarse-Grained Reconfigurable Arrays (CGRA) are
promising edge accelerators due to the outstanding
balance in flexibility, performance, and energy
efficiency. Classic CGRAs statically map compute
operations onto the processing elements (PE) and route
the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2024:SAM,
author = "Ying Wu and Chuangtao Chen and Weihua Xiao and Xuan
Wang and Chenyi Wen and Jie Han and Xunzhao Yin and
Weikang Qian and Cheng Zhuo",
title = "A Survey on Approximate Multiplier Designs for Energy
Efficiency: From Algorithms to Circuits",
journal = j-TODAES,
volume = "29",
number = "1",
pages = "23:1--23:??",
month = jan,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3610291",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Jan 15 11:14:18 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3610291",
abstract = "Given the stringent requirements of energy efficiency
for Internet-of-Things edge devices, approximate
multipliers, as a basic component of many processors
and accelerators, have been constantly proposed and
studied for decades, especially in error-resilient
applications. The computation error and energy
efficiency largely depend on how and where the
approximation is introduced into a design. Thus, this
article aims to provide a comprehensive review of the
approximation techniques in multiplier designs ranging
from algorithms and architectures to circuits. We have
implemented representative approximate multiplier
designs in each category to understand the impact of
the design techniques on accuracy and efficiency. The
designs can then be effectively deployed in high-level
applications, such as machine learning, to gain energy
efficiency at the cost of slight accuracy loss.",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liang:2024:DAU,
author = "Tung-Che Liang and Yi-Chen Chang and Zhanwei Zhong and
Yaas Bigdeli and Tsung-Yi Ho and Krishnendu Chakrabarty
and Richard Fair",
title = "Dynamic Adaptation Using Deep Reinforcement Learning
for Digital Microfluidic Biochips",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3633458",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3633458",
abstract = "We describe an exciting new application domain for
deep reinforcement learning (RL): droplet routing on
digital microfluidic biochips (DMFBs). A DMFB consists
of a two-dimensional electrode array, and it
manipulates droplets of liquid to automatically
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Qian:2024:ERL,
author = "Yu Qian and Xuegong Zhou and Hao Zhou and Lingli
Wang",
title = "An Efficient Reinforcement Learning Based Framework
for Exploring Logic Synthesis",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3632174",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3632174",
abstract = "Logic synthesis is a crucial step in electronic design
automation tools. The rapid developments of
reinforcement learning (RL) have enabled the automated
exploration of logic synthesis. Existing RL based
methods may lead to data inefficiency, and the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2024:SSG,
author = "Bo Wang and Sheng Ma and Shengbai Luo and Lizhou Wu
and Jianmin Zhang and Chunyuan Zhang and Tiejun Li",
title = "{SparGD}: a Sparse {GEMM} Accelerator with Dynamic
Dataflow",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3634703",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3634703",
abstract = "Deep learning has become a highly popular research
field, and previously deep learning algorithms ran
primarily on CPUs and GPUs. However, with the rapid
development of deep learning, it was discovered that
existing processors could not meet the specific
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kaur:2024:RRS,
author = "Jaspinder Kaur and Shirshendu Das",
title = "{RSPP}: Restricted Static Pseudo-Partitioning for
Mitigation of Cross-Core Covert Channel Attacks",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3637222",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3637222",
abstract = "Cache timing channel attacks exploit the inherent
properties of cache memories: hit and miss time along
with the shared nature of the cache to leak secret
information. The side channel and covert channel are
the two well-known cache timing channel \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kim:2024:OMP,
author = "Seok Young Kim and Jaewook Lee and Yoonah Paik and
Chang Hyun Kim and Won Jun Lee and Seon Wook Kim",
title = "Optimal Model Partitioning with Low-Overhead Profiling
on the {PIM}-based Platform for Deep Learning
Inference",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3628599",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3628599",
abstract = "Recently Processing-in-Memory (PIM) has become a
promising solution to achieve energy-efficient
computation in data-intensive applications by placing
computation near or inside the memory. In most Deep
Learning (DL) frameworks, a user manually partitions
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Niu:2024:ECS,
author = "Linwei Niu and Danda B. Rawat and Jonathan Musselwhite
and Zonghua Gu and Qingxu Deng",
title = "Energy-Constrained Scheduling for Weakly Hard
Real-Time Systems Using Standby-Sparing",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3631587",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3631587",
abstract = "For real-time embedded systems, QoS (Quality of
Service), fault tolerance, and energy budget constraint
are among the primary design concerns. In this
research, we investigate the problem of energy
constrained standby-sparing for both periodic and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ardalani:2024:DCS,
author = "Newsha Ardalani and Saptadeep Pal and Puneet Gupta",
title = "{DeepFlow}: a Cross-Stack Pathfinding Framework for
Distributed {AI} Systems",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3635867",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3635867",
abstract = "Over the past decade, machine learning model
complexity has grown at an extraordinary rate, as has
the scale of the systems training such large models.
However, there is an alarmingly low hardware
utilization (5-20\%) in large scale AI systems. The low
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{S:2024:SAS,
author = "Deepanjali S. and Noor Mahammad SK",
title = "Scalable and Accelerated Self-healing Control Circuit
Using Evolvable Hardware",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3634682",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3634682",
abstract = "Controllers are mission-critical components of any
electronic design. By sending control signals, they
decide which and when other data path elements must
operate. Faults, especially Single Event Upset (SEU)
occurrence in these components, can lead to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lu:2024:GPA,
author = "Yi-Chen Lu and Haoxing Ren and Hao-Hsiang Hsiao and
Sung Kyu Lim",
title = "{GAN-Place}: Advancing Open Source Placers to
Commercial-quality Using Generative Adversarial
Networks and Transfer Learning",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "32:1--32:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3636461",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3636461",
abstract = "Recently, GPU-accelerated placers such as DREAMPlace
and Xplace have demonstrated their superiority over
traditional CPU-reliant placers by achieving orders of
magnitude speed up in placement runtime. However, due
to their limited focus in placement \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Deng:2024:ERT,
author = "Libing Deng and Gang Zeng and Ryo Kurachi and Hiroaki
Takada and Xiongren Xiao and Renfa Li and Guoqi Xie",
title = "Enhanced Real-time Scheduling of {AVB} Flows in
Time-Sensitive Networking",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3637878",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3637878",
abstract = "Time-Sensitive Networking (TSN) realizes high
bandwidth and time determinism for data transmission
and thus becomes the crucial communication technology
in time-critical systems. The Gate Control List (GCL)
is used to control the transmission of different
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sankar:2024:TTA,
author = "Syam Sankar and Ruchika Gupta and John Jose and
Sukumar Nandi",
title = "{TROP}: {TRust-aware OPportunistic} Routing in {NoC}
with Hardware {Trojans}",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "34:1--34:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3639821",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3639821",
abstract = "Multiple software and hardware intellectual property
(IP) components are combined on a single chip to form
Multi-Processor Systems-on-Chips (MPSoCs). Due to the
rigid time-to-market constraints, some of the IPs are
from outsourced third parties. Due to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Huang:2024:ALV,
author = "Bo-Yuan Huang and Steven Lyubomirsky and Yi Li and
Mike He and Gus Henry Smith and Thierry Tambe and Akash
Gaonkar and Vishal Canumalla and Andrew Cheung and
Gu-Yeon Wei and Aarti Gupta and Zachary Tatlock and
Sharad Malik",
title = "Application-level Validation of Accelerator Designs
Using a Formal Software\slash Hardware Interface",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "35:1--35:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3639051",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3639051",
abstract = "Ideally, accelerator development should be as easy as
software development. Several recent design
languages/tools are working toward this goal, but
actually testing early designs on real applications
end-to-end remains prohibitively difficult due to the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tang:2024:MIP,
author = "Ke Tang and Lang Feng and Zhongfeng Wang",
title = "Mixed Integer Programming based Placement Refinement
by {RSMT} Model with Movable Pins",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "36:1--36:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3639365",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3639365",
abstract = "Placement is a critical step in the physical design
for digital application specific integrated circuits
(ASICs), as it can directly affect the design qualities
such as wirelength and timing. For many domain specific
designs, the demands for high \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{NS:2024:POA,
author = "Karthik Somayaji NS and Peng Li",
title = "{Pareto} Optimization of Analog Circuits Using
Reinforcement Learning",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "37:1--37:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3640463",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3640463",
abstract = "Analog circuit optimization and design presents a
unique set of challenges in the IC design process. Many
applications require the designer to optimize for
multiple competing objectives, which poses a crucial
challenge. Motivated by these practical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jiang:2024:RHF,
author = "Danping Jiang and Zibin Dai and Yanjiang Liu and
Zongren Zhang",
title = "{RGMU}: a High-flexibility and Low-cost Reconfigurable
{Galois} Field Multiplication Unit Design Approach for
{CGRCA}",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "38:1--38:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3639820",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3639820",
abstract = "Finite field multiplication is a non-linear
transformation operator that appears in the majority of
symmetric cryptographic algorithms. Numerous specified
finite field multiplication units have been proposed as
a fundamental module in the coarse-grained \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2024:MLC,
author = "Jianfeng Wang and Zhonghao Chen and Jiahao Zhang and
Yixin Xu and Tongguang Yu and Ziheng Zheng and Enze Ye
and Sumitha George and Huazhong Yang and Yongpan Liu
and Kai Ni and Vijaykrishnan Narayanan and Xueqing Li",
title = "A Module-Level Configuration Methodology for
Programmable Camouflaged Logic",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "39:1--39:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3640462",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3640462",
abstract = "Logic camouflage is a widely adopted technique that
mitigates the threat of intellectual property (IP)
piracy and overproduction in the integrated circuit
(IC) supply chain. Camouflaged logic achieves
functional obfuscation through physical-level
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Weerasena:2024:SEO,
author = "Hansika Weerasena and Prabhat Mishra",
title = "Security of Electrical, Optical, and Wireless On-chip
Interconnects: a Survey",
journal = j-TODAES,
volume = "29",
number = "2",
pages = "40:1--40:??",
month = mar,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3631117",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Mar 19 08:17:52 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3631117",
abstract = "The advancement of manufacturing technologies has
enabled the integration of more intellectual property
(IP) cores on the same system-on-chip (SoC). Scalable
and high throughput on-chip communication architecture
has become a vital component in today's \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dong:2024:DAE,
author = "Jinxin Dong and Pingqiang Zhou",
title = "Detecting Adversarial Examples Utilizing Pixel Value
Diversity",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "41:1--41:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3636460",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3636460",
abstract = "In this article, we introduce two novel methods to
detect adversarial examples utilizing pixel value
diversity. First, we propose the concept of pixel value
diversity (which reflects the spread of pixel values in
an image) and two independent metrics \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Hassani:2024:EFA,
author = "Fatemeh Serajeh Hassani and Mohammad Sadrosadati and
Nezam Rohbani and Sebastian Pointner and Robert Wille
and Hamid Sarbazi-Azad",
title = "An Efficient {FPGA} Architecture with Turn-Restricted
Switch Boxes",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "42:1--42:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3643809",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3643809",
abstract = "Abstract. Field-Programmable Gate Arrays (FPGAs)
employ a large number of SRAM cells to provide a
flexible routing architecture which have a significant
impact on the FPGA's area and power consumption. This
flexible routing allows for a rather easy \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhao:2024:EEE,
author = "Yunping Zhao and Sheng Ma and Hengzhu Liu and Libo
Huang",
title = "{EPHA}: an Energy-efficient Parallel Hybrid
Architecture for {ANNs} and {SNNs}",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3643134",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3643134",
abstract = "Artificial neural networks (ANNs) and spiking neural
networks (SNNs) are two general approaches to achieve
artificial intelligence (AI). The former have been
widely used in academia and industry fields; the
latter, SNNs, are more similar to biological \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhao:2024:DPD,
author = "Aidong Zhao and Tianchen Gu and Zhaori Bi and Fan Yang
and Changhao Yan and Xuan Zeng and Zixiao Lin and
Wenchuang Hu and Dian Zhou",
title = "{D$^3$PBO}: Dynamic Domain Decomposition-based
Parallel {Bayesian} Optimization for Large-scale Analog
Circuit Sizing",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "44:1--44:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3643811",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3643811",
abstract = "Bayesian optimization (BO) is an efficient global
optimization method for expensive black-box functions,
but the expansion for high-dimensional problems and
large sample budgets still remains a severe challenge.
In order to extend BO for large-scale \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2024:RCS,
author = "Irith Pomeranz",
title = "Reduced On-chip Storage of Seeds for Built-in Test
Generation",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "45:1--45:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3643810",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3643810",
abstract = "Logic built-in self-test ( LBIST ) approaches use an
on-chip logic block for test generation and thus enable
in-field testing. Recent reports of silent data
corruption underline the importance of in-field
testing. In a class of storage-based LBIST \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Thakur:2024:VLL,
author = "Shailja Thakur and Baleegh Ahmad and Hammond Pearce
and Benjamin Tan and Brendan Dolan-Gavitt and Ramesh
Karri and Siddharth Garg",
title = "{VeriGen}: a Large Language Model for {Verilog} Code
Generation",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "46:1--46:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3643681",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3643681",
abstract = "In this study, we explore the capability of Large
Language Models (LLMs) to automate hardware design by
automatically completing partial Verilog code, a common
language for designing and modeling digital systems. We
fine-tune pre-existing LLMs on Verilog \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Luo:2024:HTH,
author = "Yandong Luo and Shimeng Yu",
title = "{H3D}-Transformer: a Heterogeneous {3D} ({H3D})
Computing Platform for Transformer Model Acceleration
on Edge Devices",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "47:1--47:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3649219",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3649219",
abstract = "Prior hardware accelerator designs primarily focused
on single-chip solutions for 10 MB-class computer
vision models. The GB-class transformer models for
natural language processing (NLP) impose challenges on
existing accelerator design due to the massive
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pomeranz:2024:TDS,
author = "Irith Pomeranz",
title = "Two-dimensional Search Space for Extracting Broadside
Tests from Functional Test Sequences",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "48:1--48:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3650207",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3650207",
abstract = "Testing for delay faults after chip manufacturing is
critical to correct chip operation. Tests for delay
faults are applied using scan chains that provide
access to internal memory elements. As a result, a
circuit may operate under non-functional \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Brzozowski:2024:CAD,
author = "Ireneusz Brzozowski",
title = "Comparative Analysis of Dynamic Power Consumption of
Parallel Prefix Adder",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "49:1--49:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3651984",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3651984",
abstract = "The Newcomb-Benford law, also known as Benford's law,
is the law of anomalous numbers stating that in many
real-life numerical datasets, including physical and
statistical ones, numbers have a small initial digit.
Numbers irregularity observed in nature \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Rahman:2024:SES,
author = "Md Moshiur Rahman and Jim Geist and Daniel Xing and
Yuntao Liu and Ankur Srivastava and Travis Meade and
Yier Jin and Swarup Bhunia",
title = "Security Evaluation of State Space Obfuscation of
Hardware {IP} through a Red Team--Blue Team Practice",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "50:1--50:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3640461",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3640461",
abstract = "Due to the inclination towards a fab-less model of
integrated circuit (IC) manufacturing, several
untrusted entities get white-box access to the
proprietary intellectual property (IP) blocks from
diverse vendors. To this end, the untrusted entities
pose \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pan:2024:RCA,
author = "Renjian Pan and Xin Li and Krishnendu Chakrabarty",
title = "Root-Cause Analysis with Semi-Supervised Co-Training
for Integrated Systems",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "51:1--51:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3649313",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3649313",
abstract = "Root-cause analysis for integrated systems has become
increasingly challenging due to their growing
complexity. To tackle these challenges, machine
learning (ML) has been applied to enhance root-cause
analysis. Nonetheless, ML-based root-cause analysis
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Prasad:2024:SSE,
author = "Govind Prasad and Bipin Mandi and Maifuz Ali",
title = "{SEDONUT}: a Single Event Double Node Upset Tolerant
{SRAM} for Terrestrial Applications",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "52:1--52:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3651985",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3651985",
abstract = "Radiation and its effect on neighboring nodes are
critical not only for space applications but also for
terrestrial applications at modern lower-technology
nodes. This may cause static random-access memory
(SRAM) failures due to single- and multi-node
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2024:HPA,
author = "Hongduo Liu and Yijian Qian and Youqiang Liang and Bin
Zhang and Zhaohan Liu and Tao He and Wenqian Zhao and
Jiangbo Lu and Bei Yu",
title = "A High-Performance Accelerator for Real-Time
Super-Resolution on Edge {FPGAs}",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "53:1--53:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3652855",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3652855",
abstract = "In the digital era, the prevalence of low-quality
images contrasts with the widespread use of
high-definition displays, primarily due to
low-resolution cameras and compression technologies.
Image super-resolution (SR) techniques, particularly
those \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2024:DRL,
author = "Chunlin Li and Kun Jiang and Yong Zhang and Lincheng
Jiang and Youlong Luo and Shaohua Wan",
title = "Deep Reinforcement Learning-based Mining Task
Offloading Scheme for Intelligent Connected Vehicles in
{UAV}-aided {MEC}",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "54:1--54:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3653451",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3653451",
abstract = "The convergence of unmanned aerial vehicle (UAV)-aided
mobile edge computing (MEC) networks and blockchain
transforms the existing mobile networking paradigm.
However, in the temporary hotspot scenario for
intelligent connected vehicles (ICVs) in UAV-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Witharana:2024:ICT,
author = "Hasini Witharana and Aruna Jayasena and Prabhat
Mishra",
title = "Incremental Concolic Testing of Register-Transfer
Level Designs",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "55:1--55:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3655621",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3655621",
abstract = "Concolic testing is a scalable solution for automated
generation of directed tests for validation of hardware
designs. Unfortunately, concolic testing fails to cover
complex corner cases such as hard-to-activate branches.
In this article, we propose an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yang:2024:FEA,
author = "Bo Yang and Qi Xu and Hao Geng and Song Chen and Bei
Yu and Yi Kang",
title = "Floorplanning with Edge-aware Graph Attention Network
and Hindsight Experience Replay",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "56:1--56:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3653453",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3653453",
abstract = "In this article, we focus on chip floorplanning, which
aims to determine the location and orientation of
circuit macros simultaneously, so the chip area and
wirelength are minimized. As the highest level of
abstraction in hierarchical physical design, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xian:2024:WJP,
author = "Juming Xian and Yan Xing and Shuting Cai and Weijun Li
and Xiaoming Xiong and Zhengfa Hu",
title = "{WCPNet}: Jointly Predicting Wirelength, Congestion
and Power for {FPGA} Using Multi-Task Learning",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "57:1--57:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3656170",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3656170",
abstract = "To speed up the design closure and improve the QoR of
FPGA, supervised single-task machine learning
techniques have been used to predict individual design
metric based on placement results. However, the design
objective is to achieve optimal performance \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sivakumar:2024:ELP,
author = "S. Sivakumar and John Jose and Vijaykrishnan
Narayanan",
title = "Enhancing Lifetime and Performance of {MLC NVM} Caches
Using Embedded Trace Buffers",
journal = j-TODAES,
volume = "29",
number = "3",
pages = "58:1--58:??",
month = may,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3659102",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri May 10 08:08:40 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3659102",
abstract = "Large volumes of on-chip and off-chip memory are
required by contemporary applications. Emerging
non-volatile memory technologies including STT-RAM,
PCM, and ReRAM are becoming popular for on-chip and
off-chip memories as a result of their desirable
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2024:SML,
author = "Nan Wu and Yingjie Li and Hang Yang and Hanqiu Chen
and Steve Dai and Cong Hao and Cunxi Yu and Yuan Xie",
title = "Survey of Machine Learning for Software-assisted
Hardware Design Verification: Past, Present, and
Prospect",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "59:1--59:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3661308",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3661308",
abstract = "With the ever-increasing hardware design complexity
comes the realization that efforts required for
hardware verification increase at an even faster rate.
Driven by the push from the desired verification
productivity boost and the pull from leap-ahead
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Han:2024:CMC,
author = "Ruobing Han and Jun Chen and Bhanu Garg and Xule Zhou
and John Lu and Jeffrey Young and Jaewoong Sim and
Hyesoon Kim",
title = "{CuPBoP}: Making {CUDA} a Portable Language",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "60:1--60:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3659949",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3659949",
abstract = "CUDA is designed specifically for NVIDIA GPUs and is
not compatible with non-NVIDIA devices. Enabling CUDA
execution on alternative backends could greatly benefit
the hardware community by fostering a more diverse
software ecosystem.\par
To address the need for portability, our objective is
to develop a framework that meets key requirements,
such as extensive coverage, comprehensive end-to-end
support, superior performance, and hardware
scalability. Existing solutions that translate CUDA
source code into other high-level languages, however,
fall short of these goals.\par
In contrast to these source-to-source approaches, we
present a novel framework, CuPBoP, which treats CUDA as
a portable language in its own right. Compared to two
commercial source-to-source solutions, CuPBoP offers a
broader coverage and superior performance for the
CUDA-to-CPU migration. Additionally, we evaluate the
performance of CuPBoP against manually optimized CPU
programs, highlighting the differences between CPU
programs derived from CUDA and those that are manually
optimized.\par
Furthermore, we demonstrate the hardware scalability of
CuPBoP by showcasing its successful migration of CUDA
to AMD GPUs. To promote further research in this field,
we have released CuPBoP as an open-source resource.",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhao:2024:LBP,
author = "Xiang Zhao and Song Chen and Yi Kang",
title = "Load Balanced {PIM-Based} Graph Processing",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "61:1--61:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3659951",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3659951",
abstract = "Graph processing is widely used for many modern
applications, such as social networks, recommendation
systems, and knowledge graphs. However, processing
large-scale graphs on traditional Von Neumann
architectures is challenging due to the irregular graph
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tian:2024:MRE,
author = "Huan Tian and Jiewen Tang and Jun Li and Zhibing Sha
and Fan Yang and Zhigang Cai and Jianwei Liao",
title = "Modeling Retention Errors of {$3$D} {NAND} Flash for
Optimizing Data Placement",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "62:1--62:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3659101",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3659101",
abstract = "Considering 3D NAND flash has a new property of
process variation (PV), which causes different raw bit
error rates (RBER) among different layers of the flash
block. This article builds a mathematical model for
estimating the retention errors of flash \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2024:CAW,
author = "Zhisheng Chen and Xu Hu and Wenzhong Guo and Genggeng
Liu and Jiaxuan Wang and Tsungyi Ho and Xing Huang",
title = "Capacity-Aware Wash Optimization with Dynamic Fluid
Scheduling and Channel Storage for Continuous-Flow
Microfluidic Biochips",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "63:1--63:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3659952",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3659952",
abstract = "Continuous-flow microfluidic biochips are gaining
increasing attention with promising applications for
automatically executing various laboratory procedures
in biology and biochemistry. Biochips with distributed
channel-storage architectures enable each \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2024:EWP,
author = "Jian-De Li and Sying-Jyan Wang and Katherine Shu-Min
Li and Tsung-Yi Ho",
title = "Enhanced Watermarking for Paper-Based Digital
Microfluidic Biochips",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "64:1--64:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3661309",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3661309",
abstract = "Paper-based digital microfluidic biochip (PB-DMFB)
technology provides a promising solution to many
biochemical applications. However, the PB-DMFB
manufacturing process may suffer from potential
security threats. For example, a Trojan insertion
attack may \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Spieck:2024:SBD,
author = "Jan Spieck and Stefan Wildermann and J{\"u}rgen
Teich",
title = "A Scenario-Based {DVFS}-Aware Hybrid Application
Mapping Methodology for {MPSoCs}",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "65:1--65:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3660633",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3660633",
abstract = "Sound techniques for mapping soft real-time
applications to resources are indispensable for meeting
the application deadlines and minimizing objectives
such as energy consumption, particularly on
heterogeneous MPSoC architectures. For applications
with \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Joshi:2024:SPS,
author = "Priyanka Joshi and Bodhisatwa Mazumdar",
title = "Semi-Permanent Stuck-At Fault injection attacks on
{Elephant} and {GIFT} lightweight ciphers",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "66:1--66:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3662734",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3662734",
abstract = "Fault attacks pose a potent threat to modern
cryptographic implementations, particularly those used
in physically approachable embedded devices in IoT
environments. Information security in such
resource-constrained devices is ensured using
lightweight \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Soni:2024:SBH,
author = "Lokesh Soni and Neeta Pandey",
title = "A Single Bitline Highly Stable, Low Power With High
Speed Half-Select Disturb Free {11T SRAM} Cell",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "67:1--67:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3653675",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3653675",
abstract = "A half-select disturb-free 11T (HF11T) static random
access memory (SRAM) cell with low power, better
stability and high speed is presented in this paper.
The proposed SRAM cell works well with bit-interleaving
design, which enhances soft-error immunity. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Esmaeilzadeh:2024:OSM,
author = "Hadi Esmaeilzadeh and Soroush Ghodrati and Andrew
Kahng and Joon Kyung Kim and Sean Kinzer and Sayak
Kundu and Rohan Mahapatra and Susmita Dey Manasi and
Sachin Sapatnekar and Zhiang Wang and Ziqing Zeng",
title = "An Open-Source {ML}-Based Full-Stack Optimization
Framework for Machine Learning Accelerators",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "68:1--68:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3664652",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3664652",
abstract = "Parameterizable machine learning (ML) accelerators are
the product of recent breakthroughs in ML. To fully
enable their design space exploration (DSE), we propose
a physical-design-driven, learning-based prediction
framework for hardware-accelerated deep \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gandhi:2024:ARL,
author = "Upma Gandhi and Erfan Aghaeekiasaraee and Sahir and
Payam Mousavi and Ismail S. K. Bustany and Mathew E.
Taylor and Laleh Behjat",
title = "Applying reinforcement learning to learn best net to
rip and re-route in global routing",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "69:1--69:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3664286",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3664286",
abstract = "Physical designers typically employ heuristics to
solve challenging problems in global routing. However,
these heuristic solutions are not adaptable to the
ever-changing fabrication demands, and the experience
and creativity of designers can limit their \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lin:2024:CDC,
author = "Cheng-Hsien Lin and Kuan-Ting Chen and Yi-Yu Liu and
Allen C.-H. Wu and Tingting Hwang",
title = "A Cost-Driven Chip Partitioning Method for
Heterogeneous {$3$D} Integration",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "70:1--70:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3672558",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3672558",
abstract = "Three-dimensional integration circuit (3D IC) offers
significant benefits in terms of performance and cost.
Existing research in through-silicon via (TSV)-based 3D
IC partitioning has focused on minimizing the number of
TSVs to reduce costs. Partitioning \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{McDaniel:2024:RSH,
author = "Isaac McDaniel and Michael Zuzak and Ankur
Srivastava",
title = "Removal of {SAT-Hard} Instances in Logic Obfuscation
Through Inference of Functionality",
journal = j-TODAES,
volume = "29",
number = "4",
pages = "71:1--71:??",
month = jul,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3674903",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3674903",
abstract = "Logic obfuscation is a prominent approach to protect
intellectual property within integrated circuits during
fabrication. Many attacks on logic locking have been
proposed, particularly in the Boolean satifiability
(SAT) attack family, leading to the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ogbogu:2024:DPE,
author = "Chukwufumnanya Ogbogu and Biresh Joardar and
Krishnendu Chakrabarty and Jana Doppa and Partha Pratim
Pande",
title = "Data Pruning-enabled High Performance and Reliable
Graph Neural Network Training on {ReRAM}-based
Processing-in-Memory Accelerators",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "72:1--72:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3656171",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3656171",
abstract = "Graph Neural Networks (GNNs) have achieved remarkable
accuracy in cognitive tasks such as predictive
analytics on graph-structured data. Hence, they have
become very popular in diverse real-world applications.
However, GNN training with large real-world \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2024:WWT,
author = "Tinghuan Chen and Hao Geng and Qi Sun and Sanping Wan
and Yongsheng Sun and Huatao Yu and Bei Yu",
title = "{Wages}: The Worst Transistor Aging Analysis for
Large-scale Analog Integrated Circuits via Domain
Generalization",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "73:1--73:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3659950",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3659950",
abstract = "Transistor aging leads to the deterioration of analog
circuit performance over time. The worst aging
degradation is used to evaluate the circuit
reliability. It is extremely expensive to obtain it
since several circuit stimuli need to be simulated. The
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2024:TTR,
author = "Hongfei Wang and Jingyao Li and Jiayi Wang and Zijun
Ping and Hongcan Xiong and Wei Liu and Dongmian Zou",
title = "Translating Test Responses to Images for
Test-termination Prediction via Multiple Machine
Learning Strategies",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "74:1--74:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3661310",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3661310",
abstract = "Failure diagnosis is a software-based, data-driven
procedure. Collecting an excessive amount of fail data
not only increases the overall test cost but can also
potentially reduce diagnostic resolution. Thus,
test-termination prediction is proposed to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Ghosh:2024:MBF,
author = "Devleena Ghosh and Sumana Ghosh and Ansuman Banerjee
and Raj Kumar Gajavelly and Sudhakar Surendran",
title = "{MAB-BMC}: a Formal Verification Enhancer by
Harnessing Multiple {BMC} Engines Together",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "75:1--75:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3675168",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3675168",
abstract = "In recent times, Bounded Model Checking (BMC) engines
have gained wide prominence in formal verification.
Different BMC engines exist, differing in their
optimization, representations and solving mechanisms
used to represent and navigate the underlying
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sabbagh:2024:ACA,
author = "Negar Aghapour Sabbagh and Bijan Alizadeh",
title = "Automatic Correction of Arithmetic Circuits in the
Presence of Multiple Bugs by {Groebner} Basis
Modification",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "76:1--76:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3672559",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3672559",
abstract = "One promising approach to verify large arithmetic
circuits is making use of Symbolic Computer Algebra
(SCA), where the circuit and the specification are
translated to a set of polynomials, and the
verification is performed by the ideal membership
testing. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2024:PEA,
author = "Changxu Liu and Hao Zhou and Patrick Dai and Li Shang
and Fan Yang",
title = "{PriorMSM}: an Efficient Acceleration Architecture for
Multi-Scalar Multiplication",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "77:1--77:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3678006",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3678006",
abstract = "Multi-Scalar Multiplication (MSM) is a computationally
intensive task that operates on elliptic curves based
on GF(P). It is commonly used in zero-knowledge proof
(ZKP), where it accounts for a significant portion of
the computation time required for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wu:2024:POA,
author = "Xiaoqian Wu and Huaxiao Liu and Peng Wang and Lei Liu
and Zhenxue He",
title = "A Power Optimization Approach for Large-scale {RM-TB}
Dual Logic Circuits Based on an Adaptive Multi-Task
Intelligent Algorithm",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "78:1--78:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3677033",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3677033",
abstract = "Logic synthesis is a crucial step in integrated
circuit design, and power optimization is an
indispensable part of this process. However, power
optimization for large-scale Mixed Polarity
Reed--Muller (MPRM) logic circuits is an NP-hard
problem. In this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bagchi:2024:PPO,
author = "Aritra Bagchi and Dharamjeet and Ohm Rishabh and Manan
Suri and Preeti Ranjan Panda",
title = "{POEM}: Performance Optimization and Endurance
Management for Non-volatile Caches",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "79:1--79:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3653452",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3653452",
abstract = "Non-volatile memories (NVMs), with their high storage
density and ultra-low leakage power, offer promising
potential for redesigning the memory hierarchy in
next-generation Multi-Processor Systems-on-Chip
(MPSoCs). However, the adoption of NVMs in cache
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Xu:2024:DLE,
author = "Peng Xu and Siyuan Xu and Tinghuan Chen and Guojin
Chen and Tsungyi Ho and Bei Yu",
title = "{DeepOTF}: Learning Equations-constrained Prediction
for Electromagnetic Behavior",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "80:1--80:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3663476",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3663476",
abstract = "High-quality passive devices are becoming increasingly
important for the development of mobile devices and
telecommunications, but obtaining such devices through
simulation and analysis of electromagnetic (EM)
behavior is time-consuming. To address this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Mukherjee:2024:HIH,
author = "Rijoy Mukherjee and Archisman Ghosh and Rajat Subhra
Chakraborty",
title = "{HLS-IRT}: Hardware {Trojan} Insertion through
Modification of Intermediate Representation During
High-Level Synthesis",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "81:1--81:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3663477",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3663477",
abstract = "Modern integrated circuit (IC) design incorporates the
usage of proprietary computer-aided design (CAD)
software and integration of third-party hardware
intellectual property (IP) cores. Subsequently, the
fabrication process for the design takes place in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Bhat:2024:ISI,
author = "Ganapati Bhat and Biresh Kumar Joardar and Mengying
Zhao",
title = "Introduction to the Special Issue on Embedded System
Software\slash Tools",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "82:1--82:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3682061",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3682061",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Deng:2024:OVI,
author = "Can Deng and Zhaoyun Chen and Yang Shi and Yimin Ma
and Mei Wen and Lei Luo",
title = "Optimizing {VLIW} Instruction Scheduling via a
Two-Dimensional Constrained Dynamic Programming",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "83:1--83:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3643135",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3643135",
abstract = "Typical embedded processors, such as Digital Signal
Processors (DSPs), usually adopt Very Long Instruction
Word (VLIW) architecture to improve computing
efficiency. The performance of VLIW processors heavily
relies on Instruction-Level Parallelism (ILP).
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lai:2024:GNS,
author = "Chengtao Lai and Wei Zhang",
title = "{gem5-NVDLA}: a Simulation Framework for Compiling,
Scheduling, and Architecture Evaluation on {AI}
System-on-Chips",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "84:1--84:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3661997",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3661997",
abstract = "Recent years have seen an increasing trend in
designing AI accelerators together with the rest of the
system, including CPUs and memory hierarchy. This trend
calls for high-quality simulators or analytical models
that enable such kind of co-exploration. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2024:ZZM,
author = "Ping-Xiang Chen and Dongjoo Seo and Changhoon Sung and
Jongheum Park and Minchul Lee and Huaicheng Li and
Matias Bj{\o}rling and Nikil Dutt",
title = "{ZoneTrace}: Zone Monitoring Tool for {F2FS} on {ZNS
SSDs}",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "85:1--85:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3656172",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3656172",
abstract = "We present ZoneTrace, a runtime monitoring tool for
the Flash-friendly File System (F2FS) on Zoned
Namespace (ZNS) Solid-state Drives (SSDs). ZNS SSD
organizes its storage into zones of sequential write
access. Due to ZNS SSD's sequential write nature,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Aghapour:2024:ACA,
author = "Ehsan Aghapour and Dolly Sapra and Andy Pimentel and
Anuj Pathania",
title = "{ARM-CO-UP}: {ARM COoperative} Utilization of
Processors",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "86:1--86:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3656472",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3656472",
abstract = "HMPSoCs combine different processors on a single chip.
They enable powerful embedded devices, which
increasingly perform ML inference tasks at the edge.
State-of-the-art HMPSoCs can perform on-chip embedded
inference using different processors, such as
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Jordao:2024:ISD,
author = "Rodolfo Jord{\~a}o and Matthias Becker and Ingo
Sander",
title = "{IDeSyDe}: Systematic Design Space Exploration via
Design Space Identification",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "87:1--87:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3647640",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3647640",
abstract = "Design space exploration (DSE) is a key activity in
embedded design processes, where a mapping between
applications and platforms that meets the process
design requirements must be found. Finding such
mappings is very challenging due to the complexity of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Yan:2024:MCT,
author = "Wenyan Yan and Dongsheng Wei and Bin Fu and Renfa Li
and Guoqi Xie",
title = "A Mixed-Criticality Traffic Scheduler with Mitigating
Congestion for {CAN}-to-{TSN} Gateway",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "88:1--88:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3656173",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3656173",
abstract = "The network architecture that Time-Sensitive
Networking (TSN) is used as the backbone network and
the Controller Area Network (CAN) serves as the
intra-domain network is considered as the CAN-TSN
interconnection network architecture, which has gained
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Kim:2024:AHC,
author = "Jiseung Kim and Hyunsei Lee and Mohsen Imani and
Yeseong Kim",
title = "Advancing Hyperdimensional Computing Based on
Trainable Encoding and Adaptive Training for Efficient
and Accurate Learning",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "89:1--89:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3665891",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3665891",
abstract = "Hyperdimensional computing (HDC) is a computing
paradigm inspired by the mechanisms of human memory,
characterizing data through high-dimensional vector
representations, known as hypervectors. Recent
advancements in HDC have explored its potential as a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Liu:2024:CST,
author = "Mengyu Liu and Lin Zhang and Weizhe Xu and Shixiong
Jiang and Fanxin Kong",
title = "{CPSim}: Simulation Toolbox for Security Problems in
Cyber-Physical Systems",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "90:1--90:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3674904",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3674904",
abstract = "There are various applications of Cyber-Physical
systems (CPSs) that are life-critical where failure or
malfunction can result in significant harm to human
life, the environment, or substantial economic loss.
Therefore, it is important to ensure their \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Baroffio:2024:ECT,
author = "Davide Baroffio and Federico Reghenzani and William
Fornaciari",
title = "Enhanced Compiler Technology for Software-based
Hardware Fault Detection",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "91:1--91:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3660524",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3660524",
abstract = "Software-Implemented Hardware Fault Tolerance (SIHFT)
is a modern approach for tackling random hardware
faults of dependable systems employing solely software
solutions. This work extends an automatic
compiler-based SIHFT hardening tool called ASPIS,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{K:2024:FFA,
author = "Keerthi K. and Chester Rebeiro",
title = "{FortiFix}: a Fault Attack Aware Compiler Framework
for Crypto Implementations",
journal = j-TODAES,
volume = "29",
number = "5",
pages = "92:1--92:??",
month = sep,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3650029",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Sep 30 08:40:18 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3650029",
abstract = "Fault attacks are one of the most powerful forms of
cryptanalytic attack on embedded systems, which can
corrupt a cipher's operations leading to a breach of
confidentiality and integrity. A single precisely
injected fault during the execution of a cipher can be
exploited to retrieve the secret key in a few
milliseconds. Naive countermeasures introduced into
implementation can lead to huge overheads, making them
unusable in resource-constraint environments. However,
optimized countermeasures require significant
knowledge, not only about the attack but also on the
the cryptographic properties of the cipher, the program
structure, and the underlying hardware architecture.
This makes the protection against fault attacks tedious
and error prone.\par
In this article, we introduce FortiFix, the first
automated compiler framework that can detect and patch
fault exploitable regions in a block cipher
implementation. The framework has two phases. The
pre-compilation phase identifies regions in the source
code of a block cipher that are vulnerable to fault
attacks. The second phase is incorporated as
transformation passes in the LLVM compiler to find
exploitable instructions, quantify the impact of a
fault on these instructions, and finally insert
appropriate countermeasures based on user-defined
security requirements. As a proof of concept, we have
evaluated two block cipher implementations, AES-128 and
CLEFIA-128, on three different hardware platforms:
MSP430 (16-bit), ARM (32-bit), and RISCV (32-bit).",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sun:2024:EPP,
author = "Xiaoyu Sun and Xiaochen Peng and Sai Qian Zhang and
Jorge Gomez and Win-San Khwa and Syed Shakib Sarwar and
Ziyun Li and Weidong Cao and Zhao Wang and Chiao Liu
and Meng-Fan Chang and Barbara {De Salvo} and Kerem
Akarvardar and H.-S. Philip Wong",
title = "Estimating Power, Performance, and Area for On-Sensor
Deployment of {AR\slash VR} Workloads Using an
Analytical Framework",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "93:1--93:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3670404",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3670404",
abstract = "Augmented Reality and Virtual Reality have emerged as
the next frontier of intelligent image sensors and
computer systems. In these systems, 3D die stacking
stands out as a compelling solution, enabling in situ
processing capability of the sensory data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Pereira:2024:MSS,
author = "Danny Pereira and Sumana Ghosh and Soumyajit Dey",
title = "Multi-Stream Scheduling of Inference Pipelines on Edge
Devices --- a {DRL} Approach",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "94:1--94:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3677378",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3677378",
abstract = "Low-power edge devices equipped with Graphics
Processing Units (GPUs) are a popular target platform
for real-time scheduling of inference pipelines. Such
application-architecture combinations are popular in
Advanced Driver-assistance Systems for aiding in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Wang:2024:EAS,
author = "Hongfei Wang and Wei Liu and Wenjie Cai and Yunxiao Lu
and Caixue Wan",
title = "Efficient Attacks on Strong {PUFs} via Covariance and
{Boolean} Modeling",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "95:1--95:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3687469",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3687469",
abstract = "The physical unclonable function (PUF) is a widely
used hardware security primitive. Before hacking into a
PUF-protected system, intruders typically initiate
attacks on the PUF as the first step. Many strong PUF
designs have been proposed to thwart non-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Zhou:2024:RNI,
author = "Chencan Zhou and Yang Cao and Quan Shi and Luxin Wang
and Xiaoqing Wen",
title = "A Robust {Newton} Iteration Method for
Mixed-Cell-Height Circuit Legalization Under Technology
and Region Constraints",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "96:1--96:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3689436",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3689436",
abstract = "The evolution of advanced technology nodes has
prompted a shift toward mixed-cell-height circuit
design, while the introduction of technology and fence
region constraints further increases the complexity of
placement. In this article, we innovatively \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Nath:2024:AAL,
author = "Arijit Nath and Hemangee K. Kapoor",
title = "{AmLuCEP}: Amalgamating {LUT}-based Compression and
Adaptive Encoding Assisted Block Placement To Improve
Lifetime of {PCM}-based Main Memories",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "97:1--97:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3689334",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3689334",
abstract = "With the rising demands for high capacity memory and
poor scalability of the existing DRAM-based main
memories, the emerging Non-volatile memories captures
higher attention due to their high density and low
leakage power consumption. However, the possible
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Chen:2024:ATP,
author = "Kean Chen and Mingsheng Ying",
title = "Automatic Test Pattern Generation for Robust Quantum
Circuit Testing",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "98:1--98:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3689333",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3689333",
abstract = "Quantum circuit testing is essential for detecting
potential faults in realistic quantum devices, while
the testing process itself also suffers from the
inexactness and unreliability of quantum operations.
This article alleviates the issue by proposing a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Tseng:2024:BBA,
author = "Wei-Hsiang Tseng and Yao-Wen Chang",
title = "A Bridge-based Algorithm for Simultaneous Primal and
Dual Defects Compression on Topologically
Quantum-error-corrected Circuits",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "99:1--99:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3695252",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3695252",
abstract = "Topological quantum error correction (TQEC) using the
surface code is among the most promising techniques for
fault-tolerant quantum circuits. The required resource
of a TQEC circuit can be modeled as a space-time volume
of a three-dimensional diagram by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Li:2024:ZFR,
author = "Zhuoran Li and Danella Zhao",
title = "{ZeroD-fender}: a Resource-aware {IoT} Malware
Detection Engine via Fine-grained Side-channel
Analysis",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "100:1--100:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3687482",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3687482",
abstract = "In early 2023, cyberattacks experienced a significant
rise due to unknown (zero-day) malware targeting
Internet of Things (IoT) devices. To tackle the
challenge of zero-day detection within a highly
resource-constrained IoT environment, we propose a
novel \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Amuru:2024:TLE,
author = "Deepthi Amuru and Raja Mavullu Vechalapu and Zia
Abbas",
title = "Transfer Learning Enabled Modeling Paradigm for
{PVT}-aware Circuit Performance Estimation",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "101:1--101:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3689435",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3689435",
abstract = "Designing robust performance models for modern complex
digital circuits in the face of rapidly accelerating
process variations is a critical yet demanding task.
This paper introduces an efficient statistical
performance modeling approach for VLSI digital
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Fang:2024:PFS,
author = "Wei-Kai Fang and Wai-Kei Mak",
title = "Placement Flow Study and Detailed Placement for
Hybrid-Row-Height Designs",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "102:1--102:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3690385",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3690385",
abstract = "At the 3 nm node, a hybrid-row-height design paradigm
has emerged for better power efficiency and performance
optimization. A diverse cell library that includes
multiple variants of a cell with different fin counts
is available. Instead of using cells \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Gao:2024:MRM,
author = "Zhenyi Gao and Sheqin Dong and Zhicong Tang and
Wenjian Yu",
title = "{MCMCF-Router}: Multi-capacity Ordered Escape Routing
Algorithms for Grid\slash Staggered Pin Array",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "103:1--103:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3695253",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3695253",
abstract = "Ordered escape routing (OER), which means that the
pins need to be routed to the boundary of a pin array
in a given order, is an important research topic in PCB
design. Although OER has been widely investigated, most
works assume that the routing capacity \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Sanjaya:2024:ABV,
author = "Sahan Sanjaya and Hasini Witharana and Prabhat
Mishra",
title = "Assertion-Based Validation using Clustering and
Dynamic Refinement of Hardware Checkers",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "104:1--104:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3696108",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3696108",
abstract = "Post-silicon validation is a vital step in
System-on-Chip (SoC) design cycle. A major challenge in
post-silicon validation is the limited observability of
internal signal states using trace buffers. Hardware
assertions are promising to improve \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Lin:2024:EMD,
author = "Jingui Lin and Wenxiong Lin and Shiyan Liang and Peng
Gao and Yan Xing and Tingting Wu and Xiaoming Xiong and
Shuting Cai",
title = "An Efficient Method of {DRC} Violation Prediction with
a Serial Deep Learning Model",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "105:1--105:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3694968",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3694968",
abstract = "In VLSI design, the utilization of Design Rule Check
(DRC) tools in the early stage is crucial for
predicting and resolving violations, thereby expediting
the physical design process. In our study, we present
an efficient model that predicts DRC \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Dahiya:2024:RMC,
author = "Ayush Dahiya and Poornima Mittal and Rajesh Rohilla",
title = "Realizing In-Memory Computing using Reliable
Differential {8T} {SRAM} for Improved Latency",
journal = j-TODAES,
volume = "29",
number = "6",
pages = "106:1--106:??",
month = nov,
year = "2024",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3696666",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Oct 21 06:29:26 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/10.1145/3696666",
abstract = "Traditional von Neumann computing architectures suffer
from high energy and lower speed as compared to the
requirements of modern applications like those required
in neural network accelerators. A modified differential
eight transistor (8$^+$ T) static random \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Transact. Des. Automat. Electron. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}