@Preamble{"\input bibnames.sty" #
"\ifx \undefined \pkg \def \pkg #1{{{\tt #1}}} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TECS = "ACM Transactions on Embedded Computing
Systems"}
@Article{Wolf:2002:III,
author = "Wayne Wolf",
title = "Introduction to the inaugural issue",
journal = j-TECS,
volume = "1",
number = "1",
pages = "1--1",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jacob:2002:ITS,
author = "Bruce Jacob and Shuvra Bhattacharyya",
title = "Introduction to the two special issues on memory",
journal = j-TECS,
volume = "1",
number = "1",
pages = "2--5",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Avissar:2002:OMA,
author = "Oren Avissar and Rajeev Barua and Dave Stewart",
title = "An optimal memory allocation scheme for
scratch-pad-based embedded systems",
journal = j-TECS,
volume = "1",
number = "1",
pages = "6--26",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2002:TGC,
author = "G. Chen and R. Shetty and M. Kandemir and N.
Vijaykrishnan and M. J. Irwin and M. Wolczko",
title = "Tuning garbage collection for reducing memory system
energy in an embedded {Java} environment",
journal = j-TECS,
volume = "1",
number = "1",
pages = "27--55",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2002:AAI,
author = "Jung-Hoon Lee and Shin-Dug Kim and Charles Weems",
title = "Application-adaptive intelligent cache memory system",
journal = j-TECS,
volume = "1",
number = "1",
pages = "56--78",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2002:FVL,
author = "Jun Yang and Rajiv Gupta",
title = "Frequent value locality and its applications",
journal = j-TECS,
volume = "1",
number = "1",
pages = "79--105",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ykman-Couvreur:2002:SLE,
author = "Ch. Ykman-Couvreur and J. Lambrecht and A. {Van Der
Togt} and F. Catthoor and H. {De Man}",
title = "System-level exploration of association table
implementations in telecom network applications",
journal = j-TECS,
volume = "1",
number = "1",
pages = "106--140",
month = nov,
year = "2002",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:40 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jacob:2003:ITS,
author = "Bruce Jacob and Shuvra Bhattacharyya",
title = "Introduction to the two special issues on memory",
journal = j-TECS,
volume = "2",
number = "1",
pages = "1--4",
month = feb,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Benini:2003:EAD,
author = "Luca Benini and Alberto Macii and Massimo Poncino",
title = "Energy-aware design of embedded memories: a survey of
technologies, architectures, and optimization
techniques",
journal = j-TECS,
volume = "2",
number = "1",
pages = "5--32",
month = feb,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Grun:2003:APB,
author = "Peter Grun and Nikil Dutt and Alex Nicolau",
title = "Access pattern-based memory and connectivity
architecture exploration",
journal = j-TECS,
volume = "2",
number = "1",
pages = "33--73",
month = feb,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Qu:2003:SSS,
author = "Gang Qu and Miodrag Potkonjak",
title = "System synthesis of synchronous multimedia
applications",
journal = j-TECS,
volume = "2",
number = "1",
pages = "74--97",
month = feb,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shim:2003:LEC,
author = "Hojun Shim and Yongsoo Joo and Yongseok Choi and Hyung
Gyu Lee and Naehyuck Chang",
title = "Low-energy off-chip {SDRAM} memory systems for
embedded applications",
journal = j-TECS,
volume = "2",
number = "1",
pages = "98--130",
month = feb,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guang:2003:SIC,
author = "Gao Guang and Trevor Mudge",
title = "Special issue on compilers, architecture, and
synthesis for embedded systems",
journal = j-TECS,
volume = "2",
number = "2",
pages = "131--131",
month = may,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Franke:2003:ARH,
author = "Bj{\"o}rn Franke and Michael O'Boyle",
title = "Array recovery and high-level transformations for
{DSP} applications",
journal = j-TECS,
volume = "2",
number = "2",
pages = "132--162",
month = may,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2003:PIC,
author = "Soontae Kim and N. Vijaykrishnan and Mahmut Kandemir
and Anand Sivasubramaniam and Mary Jane Irwin",
title = "Partitioned instruction cache architecture for energy
efficiency",
journal = j-TECS,
volume = "2",
number = "2",
pages = "163--185",
month = may,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rabbah:2003:DRD,
author = "Rodric M. Rabbah and Krishna V. Palem",
title = "Data remapping for design space optimization of
embedded memory systems",
journal = j-TECS,
volume = "2",
number = "2",
pages = "186--218",
month = may,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2003:SRM,
author = "Qin Zhao and Bart Mesman and Twan Basten",
title = "Static resource models for code-size efficient
embedded processors",
journal = j-TECS,
volume = "2",
number = "2",
pages = "219--250",
month = may,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:41 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jacome:2003:SIP,
author = "Margarida Jacome and Francky Catthoor",
title = "Special issue on power-aware embedded computing",
journal = j-TECS,
volume = "2",
number = "3",
pages = "251--254",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Martin:2003:CSS,
author = "Thomas L. Martin and Daniel P. Siewiorek and Asim
Smailagic and Matthew Bosworth and Matthew Ettus and
Jolin Warren",
title = "A case study of a system-level approach to power-aware
computing",
journal = j-TECS,
volume = "2",
number = "3",
pages = "255--276",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rakhmatov:2003:EMB,
author = "Daler Rakhmatov and Sarma Vrudhula",
title = "Energy management for battery-powered embedded
systems",
journal = j-TECS,
volume = "2",
number = "3",
pages = "277--324",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Irani:2003:OSD,
author = "Sandy Irani and Sandeep Shukla and Rajesh Gupta",
title = "Online strategies for dynamic power management in
systems with multiple power-saving states",
journal = j-TECS,
volume = "2",
number = "3",
pages = "325--346",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2003:AMC,
author = "Huiyang Zhou and Mark C. Toburen and Eric Rotenberg
and Thomas M. Conte",
title = "Adaptive mode control: a static-power-efficient cache
design",
journal = j-TECS,
volume = "2",
number = "3",
pages = "347--372",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Unsal:2003:CCC,
author = "Osman S. Unsal and Raksit Ashok and Israel Koren and
C. Mani Krishna and Csaba Andras Moritz",
title = "{Cool-Cache}: a compiler-enabled energy efficient data
caching framework for embedded/multimedia processors",
journal = j-TECS,
volume = "2",
number = "3",
pages = "373--392",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yun:2003:EOV,
author = "Han-Saem Yun and Jihong Kim",
title = "On energy-optimal voltage scheduling for
fixed-priority hard real-time systems",
journal = j-TECS,
volume = "2",
number = "3",
pages = "393--430",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schurgers:2003:PME,
author = "Curt Schurgers and Vijay Raghunathan and Mani B.
Srivastava",
title = "Power management for energy-aware communication
systems",
journal = j-TECS,
volume = "2",
number = "3",
pages = "431--447",
month = aug,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 7 11:26:42 MDT 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gordon-Ross:2003:TIC,
author = "Ann Gordon-Ross and Susan Cotterell and Frank Vahid",
title = "Tiny instruction caches for low power embedded
systems",
journal = j-TECS,
volume = "2",
number = "4",
pages = "449--481",
month = nov,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 22 17:52:29 MST 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2003:CMC,
author = "Kelvin Lin and Chung-Ping Chung and Jean Jyh-Jiun
Shann",
title = "Compressing {MIPS} code by multiple operand
dependencies",
journal = j-TECS,
volume = "2",
number = "4",
pages = "482--508",
month = nov,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 22 17:52:29 MST 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Musoll:2003:SRU,
author = "Enric Musoll",
title = "Speculating to reduce unnecessary power consumption",
journal = j-TECS,
volume = "2",
number = "4",
pages = "509--536",
month = nov,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 22 17:52:29 MST 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rusu:2003:MRR,
author = "Cosmin Rusu and Rami Melhem and Daniel Moss{\'e}",
title = "Maximizing rewards for real-time applications with
energy constraints",
journal = j-TECS,
volume = "2",
number = "4",
pages = "537--559",
month = nov,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 22 17:52:29 MST 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Venkataramani:2003:ACC,
author = "Girish Venkataramani and Walid Najjar and Fadi Kurdahi
and Nader Bagherzadeh and Wim Bohm and Jeff Hammes",
title = "Automatic compilation to a coarse-grained
reconfigurable system-on-a-chip",
journal = j-TECS,
volume = "2",
number = "4",
pages = "560--589",
month = nov,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 22 17:52:29 MST 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhuge:2003:CSR,
author = "Qingfeng Zhuge and Bin Xiao and Edwin H.-M. Sha",
title = "Code size reduction technique and implementation for
software-pipelined {DSP} applications",
journal = j-TECS,
volume = "2",
number = "4",
pages = "590--613",
month = nov,
year = "2003",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 22 17:52:29 MST 2003",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gupta:2004:GES,
author = "Rajesh Gupta",
title = "Guest editorial: {Special} issue on networked embedded
systems",
journal = j-TECS,
volume = "3",
number = "1",
pages = "1--2",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Raghunathan:2004:EEW,
author = "Vijay Raghunathan and Saurabh Ganeriwal and Mani
Srivastava and Curt Schurgers",
title = "Energy efficient wireless packet scheduling and fair
queuing",
journal = j-TECS,
volume = "3",
number = "1",
pages = "3--23",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bulusu:2004:SCL,
author = "Nirupama Bulusu and John Heidemann and Deborah Estrin
and Tommy Tran",
title = "Self-configuring localization systems: Design and
Experimental Evaluation",
journal = j-TECS,
volume = "3",
number = "1",
pages = "24--60",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zou:2004:SDT,
author = "Yi Zou and Krishnendu Chakrabarty",
title = "Sensor deployment and target localization in
distributed sensor networks",
journal = j-TECS,
volume = "3",
number = "1",
pages = "61--91",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gebotys:2004:DSC,
author = "Catherine H. Gebotys",
title = "Design of secure cryptography against the threat of
power-attacks in {DSP}-embedded processors",
journal = j-TECS,
volume = "3",
number = "1",
pages = "92--113",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mishra:2004:MVP,
author = "Prabhat Mishra and Nikil Dutt",
title = "Modeling and validation of pipeline specifications",
journal = j-TECS,
volume = "3",
number = "1",
pages = "114--139",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mishra:2004:PMC,
author = "Prabhat Mishra and Mahesh Mamidipaka and Nikil Dutt",
title = "Processor-memory coexploration using an architecture
description language",
journal = j-TECS,
volume = "3",
number = "1",
pages = "140--162",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Naik:2004:CCS,
author = "Mayur Naik and Jens Palsberg",
title = "Compiling with code-size constraints",
journal = j-TECS,
volume = "3",
number = "1",
pages = "163--181",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schmitz:2004:ISO,
author = "Marcus T. Schmitz and Bashir M. Al-Hashimi and Petru
Eles",
title = "Iterative schedule optimization for voltage scalable
distributed embedded systems",
journal = j-TECS,
volume = "3",
number = "1",
pages = "182--217",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Stitt:2004:ESS,
author = "Greg Stitt and Frank Vahid and Shawn Nematbakhsh",
title = "Energy savings and speedups from partitioning critical
software loops to hardware in embedded systems",
journal = j-TECS,
volume = "3",
number = "1",
pages = "218--232",
month = feb,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 6 07:14:21 MST 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lach:2004:ESI,
author = "John Lach and Kia Bazargan",
title = "Editorial: {Special} issue on dynamically adaptable
embedded systems",
journal = j-TECS,
volume = "3",
number = "2",
pages = "233--236",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghiasi:2004:OAM,
author = "Soheil Ghiasi and Ani Nahapetian and Majid
Sarrafzadeh",
title = "An optimal algorithm for minimizing run-time
reconfiguration delay",
journal = j-TECS,
volume = "3",
number = "2",
pages = "237--256",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Robertson:2004:DFP,
author = "Ian Robertson and James Irvine",
title = "A design flow for partially reconfigurable hardware",
journal = j-TECS,
volume = "3",
number = "2",
pages = "257--283",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mejia-Alvarez:2004:ASS,
author = "Pedro Mejia-Alvarez and Eugene Levner and Daniel
Moss{\'e}",
title = "Adaptive scheduling server for power-aware real-time
tasks",
journal = j-TECS,
volume = "3",
number = "2",
pages = "284--306",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2004:BAP,
author = "Fan Zhang and Samuel T. Chanson",
title = "Blocking-aware processor voltage scheduling for
real-time tasks",
journal = j-TECS,
volume = "3",
number = "2",
pages = "307--335",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2004:DAF,
author = "Ying Zhang and Krishnendu Chakrabarty",
title = "Dynamic adaptation for fault tolerance and power
management in embedded real-time systems",
journal = j-TECS,
volume = "3",
number = "2",
pages = "336--360",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2004:DDR,
author = "Zhining Huang and Sharad Malik and Nahri Moreano and
Guido Araujo",
title = "The design of dynamically reconfigurable datapath
coprocessors",
journal = j-TECS,
volume = "3",
number = "2",
pages = "361--384",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Noguera:2004:MRA,
author = "Juanjo Noguera and Rosa M. Badia",
title = "Multitasking on reconfigurable architectures:
microarchitecture support and dynamic scheduling",
journal = j-TECS,
volume = "3",
number = "2",
pages = "385--406",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2004:STC,
author = "Chuanjun Zhang and Frank Vahid and Roman Lysecky",
title = "A self-tuning cache architecture for embedded
systems",
journal = j-TECS,
volume = "3",
number = "2",
pages = "407--425",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{He:2004:AAA,
author = "Tian He and Brian M. Blum and John A. Stankovic and
Tarek Abdelzaher",
title = "{AIDA}: {Adaptive} application-independent data
aggregation in wireless sensor networks",
journal = j-TECS,
volume = "3",
number = "2",
pages = "426--457",
month = may,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Serpanos:2004:GES,
author = "Dimitrios N. Serpanos and Haris Lekatsas",
title = "Guest editorial: {Special} issue on embedded systems
and security",
journal = j-TECS,
volume = "3",
number = "3",
pages = "459--460",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ravi:2004:SES,
author = "Srivaths Ravi and Anand Raghunathan and Paul Kocher
and Sunil Hattangady",
title = "Security in embedded systems: {Design} challenges",
journal = j-TECS,
volume = "3",
number = "3",
pages = "461--491",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Coron:2004:SSL,
author = "Jean-Sebastien Coron and David Naccache and Paul
Kocher",
title = "Statistics and secret leakage",
journal = j-TECS,
volume = "3",
number = "3",
pages = "492--508",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wollinger:2004:EHC,
author = "Thomas Wollinger and Jan Pelzl and Volker
Wittelsberger and Christof Paar and G{\"o}kay Saldamli
and {\c{C}}etin K. Ko{\c{c}}",
title = "Elliptic and hyperelliptic curves on embedded {$ \mu
$P}",
journal = j-TECS,
volume = "3",
number = "3",
pages = "509--533",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wollinger:2004:SFS,
author = "Thomas Wollinger and Jorge Guajardo and Christof
Paar",
title = "Security on {FPGAs}: {State-of-the-art}
implementations and attacks",
journal = j-TECS,
volume = "3",
number = "3",
pages = "534--574",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reyhani-Masoleh:2004:EDS,
author = "Arash Reyhani-Masoleh and M. Anwar Hasan",
title = "Efficient digit-serial normal basis multipliers over
binary extension fields",
journal = j-TECS,
volume = "3",
number = "3",
pages = "575--592",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reyhani-Masoleh:2004:TFT,
author = "Arash Reyhani-Masoleh and M. Anwar Hasan",
title = "Towards fault-tolerant cryptographic computations over
finite fields",
journal = j-TECS,
volume = "3",
number = "3",
pages = "593--613",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2004:FSM,
author = "Rong-Tai Liu and Nen-Fu Huang and Chih-Hao Chen and
Chia-Nan Kao",
title = "A fast string-matching algorithm for network
processor-based intrusion detection system",
journal = j-TECS,
volume = "3",
number = "3",
pages = "614--633",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2004:LLS,
author = "Taejoon Park and Kang G. Shin",
title = "{LiSP}: a lightweight security protocol for wireless
sensor networks",
journal = j-TECS,
volume = "3",
number = "3",
pages = "634--660",
month = aug,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:47 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Harkin:2004:MOR,
author = "J. Harkin and T. M. McGinnity and L. P. Maguire",
title = "Modeling and optimizing run-time reconfiguration using
evolutionary computation",
journal = j-TECS,
volume = "3",
number = "4",
pages = "661--685",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Im:2004:DVS,
author = "Chaeseok Im and Soonhoi Ha and Huiseok Kim",
title = "Dynamic voltage scheduling with buffers in low-power
multimedia applications",
journal = j-TECS,
volume = "3",
number = "4",
pages = "686--705",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Manolache:2004:SAA,
author = "Sorin Manolache and Petru Eles and Zebo Peng",
title = "Schedulability analysis of applications with
stochastic task execution times",
journal = j-TECS,
volume = "3",
number = "4",
pages = "706--735",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Serpanos:2004:EHS,
author = "Dimitrios N. Serpanos and Poluxeni Mountrouidou and
Maria Gamvrili",
title = "Evaluation of hardware and software schedulers for
embedded switches",
journal = j-TECS,
volume = "3",
number = "4",
pages = "736--759",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lanotte:2004:IFH,
author = "Ruggero Lanotte and Andrea Maggiolo-Schettini and
Simone Tini",
title = "Information flow in hybrid systems",
journal = j-TECS,
volume = "3",
number = "4",
pages = "760--799",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2004:MBA,
author = "Donggang Liu and Peng Ning",
title = "Multilevel {$ \mu $TESLA}: {Broadcast} authentication
for distributed sensor networks",
journal = j-TECS,
volume = "3",
number = "4",
pages = "800--836",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2004:RTG,
author = "Li-Pin Chang and Tei-Wei Kuo and Shi-Wu Lo",
title = "Real-time garbage collection for flash-memory storage
systems of real-time embedded systems",
journal = j-TECS,
volume = "3",
number = "4",
pages = "837--863",
month = nov,
year = "2004",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Oct 29 06:35:48 MDT 2004",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mueller:2005:ISI,
author = "Frank Mueller and Per Stenstr{\"o}m",
title = "Introduction to the special issue",
journal = j-TECS,
volume = "4",
number = "1",
pages = "1--2",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Krishnaswamy:2005:DCB,
author = "Arvind Krishnaswamy and Rajiv Gupta",
title = "Dynamic coalescing for 16-bit instructions",
journal = j-TECS,
volume = "4",
number = "1",
pages = "3--37",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Corliss:2005:IED,
author = "Marc L. Corliss and E. Christopher Lewis and Amir
Roth",
title = "The implementation and evaluation of dynamic code
decompression using {DISE}",
journal = j-TECS,
volume = "4",
number = "1",
pages = "38--72",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dhurjati:2005:MSG,
author = "Dinakar Dhurjati and Sumant Kowshik and Vikram Adve
and Chris Lattner",
title = "Memory safety without garbage collection for embedded
applications",
journal = j-TECS,
volume = "4",
number = "1",
pages = "73--111",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pop:2005:SDF,
author = "Paul Pop and Petru Eles and Zebo Peng",
title = "Schedulability-driven frame packing for multicluster
distributed embedded systems",
journal = j-TECS,
volume = "4",
number = "1",
pages = "112--140",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Swaminathan:2005:PBE,
author = "Vishnu Swaminathan and Krishnendu Chakrabarty",
title = "Pruning-based, energy-optimal, deterministic {I/O}
device scheduling for hard real-time systems",
journal = j-TECS,
volume = "4",
number = "1",
pages = "141--167",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chiou:2005:SAS,
author = "Lih-yih Chiou and Swarup Bhunia and Kaushik Roy",
title = "Synthesis of application-specific highly efficient
multi-mode cores for embedded systems",
journal = j-TECS,
volume = "4",
number = "1",
pages = "168--188",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zambreno:2005:SOA,
author = "Joseph Zambreno and Alok Choudhary and Rahul Simha and
Bhagi Narahari and Nasir Memon",
title = "{SAFE-OPS}: an approach to embedded software
security",
journal = j-TECS,
volume = "4",
number = "1",
pages = "189--210",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kwon:2005:OVA,
author = "Woo-Cheol Kwon and Taewhan Kim",
title = "Optimal voltage allocation techniques for dynamically
variable voltage processors",
journal = j-TECS,
volume = "4",
number = "1",
pages = "211--230",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tan:2005:EME,
author = "T. K. Tan and A. Raghunathan and N. K. Jha",
title = "Energy macromodeling of embedded operating systems",
journal = j-TECS,
volume = "4",
number = "1",
pages = "231--254",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:48:07 MST 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2005:GES,
author = "Sandeep K. Shukla and Jean-Pierre Talpin",
title = "Guest editorial: {Special} issue on models and
methodologies for co-design of embedded systems",
journal = j-TECS,
volume = "4",
number = "2",
pages = "225--227",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cachera:2005:VSP,
author = "David Cachera and Katell Morin-Allory",
title = "Verification of safety properties for parameterized
regular systems",
journal = j-TECS,
volume = "4",
number = "2",
pages = "228--266",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chouali:2005:PPM,
author = "S. Chouali and J. Julliand and P.-A. Masson and F.
Bellegarde",
title = "{PLTL}-partitioned model checking for reactive systems
under fairness assumptions",
journal = j-TECS,
volume = "4",
number = "2",
pages = "267--301",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gardner:2005:CCS,
author = "William B. Gardner",
title = "Converging {CSP} specifications and {C++} programming
via selective formalism",
journal = j-TECS,
volume = "4",
number = "2",
pages = "302--330",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ziller:2005:CSS,
author = "Roberto Ziller and Klaus Schneider",
title = "Combining supervisor synthesis and model checking",
journal = j-TECS,
volume = "4",
number = "2",
pages = "331--362",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2005:HCC,
author = "Chuanjun Zhang and Frank Vahid and Walid Najjar",
title = "A highly configurable cache for low energy embedded
systems",
journal = j-TECS,
volume = "4",
number = "2",
pages = "363--387",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kadayif:2005:DSO,
author = "I. Kadayif and M. Kandemir",
title = "Data space-oriented tiling for enhancing locality",
journal = j-TECS,
volume = "4",
number = "2",
pages = "388--414",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Muresan:2005:ICM,
author = "Radu Muresan and Catherine Gebotys",
title = "Instantaneous current modeling in a complex {VLIW}
processor core",
journal = j-TECS,
volume = "4",
number = "2",
pages = "415--451",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Petrov:2005:RCF,
author = "Peter Petrov and Alex Orailoglu",
title = "A reprogrammable customization framework for efficient
branch resolution in embedded processors",
journal = j-TECS,
volume = "4",
number = "2",
pages = "452--468",
month = may,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jun 21 16:50:36 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Burns:2005:E,
author = "Alan Burns",
title = "Editorial",
journal = j-TECS,
volume = "4",
number = "3",
pages = "469--471",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sangiovanni-Vincentelli:2005:OES,
author = "Alberto L. Sangiovanni-Vincentelli and Alessandro
Pinto",
title = "An overview of embedded system design education at
{Berkeley}",
journal = j-TECS,
volume = "4",
number = "3",
pages = "472--499",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Koopman:2005:UES,
author = "Philip Koopman and Howie Choset and Rajeev Gandhi and
Bruce Krogh and Diana Marculescu and Priya Narasimhan
and Joann M. Paul and Ragunathan Rajkumar and Daniel
Siewiorek and Asim Smailagic and Peter Steenkiste and
Donald E. Thomas and Chenxi Wang",
title = "Undergraduate embedded system education at {Carnegie
Mellon}",
journal = j-TECS,
volume = "4",
number = "3",
pages = "500--528",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Verbauwhede:2005:SES,
author = "Ingrid Verbauwhede and Patrick Schaumont",
title = "Skiing the embedded systems mountain",
journal = j-TECS,
volume = "4",
number = "3",
pages = "529--548",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sztipanovits:2005:IES,
author = "Janos Sztipanovits and Gautam Biswas and Ken Frampton
and Aniruddha Gokhale and Larry Howard and Gabor Karsai
and T. John Koo and Xenofon Koutsoukos and Douglas C.
Schmidt",
title = "Introducing embedded software and systems education
and advanced learning technology in an engineering
curriculum",
journal = j-TECS,
volume = "4",
number = "3",
pages = "549--568",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seviora:2005:CES,
author = "Rudolph E. Seviora",
title = "A curriculum for embedded system engineering",
journal = j-TECS,
volume = "4",
number = "3",
pages = "569--586",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Caspi:2005:GGC,
author = "P. Caspi and A. Sangiovanni-Vincentelli and L. Almeida
and A. Benveniste and B. Bouyssounouse and G. Buttazzo
and I. Crnkovic and W. Damm and J. Engblom and G.
Folher and M. Garcia-Valls and H. Kopetz and Y.
Lakhnech and F. Laroussinie and L. Lavagno and G.
Lipari and F. Maraninchi and Ph. Peti and J. de la
Puente and N. Scaife and J. Sifakis and R. de Simone
and M. Torngren and P. Ver{\'\i}ssimo and A. J.
Wellings and R. Wilhelm and T. Willemse and W. Yi",
title = "Guidelines for a graduate curriculum on embedded
software and systems",
journal = j-TECS,
volume = "4",
number = "3",
pages = "587--611",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2005:ESC,
author = "Tai-Yi Huang and Chung-Ta King and Youn-Long Steve Lin
and Yin-Tsung Hwang",
title = "The embedded software consortium of {Taiwan}",
journal = j-TECS,
volume = "4",
number = "3",
pages = "612--632",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Grimheden:2005:WES,
author = "Martin Grimheden and Martin T{\"o}rngren",
title = "What is embedded systems and how should it be
taught?---results from a didactic analysis",
journal = j-TECS,
volume = "4",
number = "3",
pages = "633--651",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2005:RDC,
author = "Wei Zhang and Mahmut Kandemir and Mustafa Karakoy and
Guangyu Chen",
title = "Reducing data cache leakage energy using a
compiler-based approach",
journal = j-TECS,
volume = "4",
number = "3",
pages = "652--678",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2005:DDC,
author = "Hyung Seok Kim and Tarek F. Abdelzaher and Wook Hyun
Kwon",
title = "Dynamic delay-constrained minimum-energy dissemination
in wireless sensor networks",
journal = j-TECS,
volume = "4",
number = "3",
pages = "679--706",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Sep 17 15:05:12 MDT 2005",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alur:2005:P,
author = "Rajeev Alur and Insup Lee",
title = "Preface",
journal = j-TECS,
volume = "4",
number = "4",
pages = "707--707",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tardieu:2005:LE,
author = "Olivier Tardieu and Robert de Simone",
title = "Loops in {ESTEREL}",
journal = j-TECS,
volume = "4",
number = "4",
pages = "708--750",
month = nov,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1113830.1113832",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Regehr:2005:ESO,
author = "John Regehr and Alastair Reid and Kirk Webb",
title = "Eliminating stack overflow by abstract
interpretation",
journal = j-TECS,
volume = "4",
number = "4",
pages = "751--778",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tripakis:2005:TDT,
author = "Stavros Tripakis and Christos Sofronis and Paul Caspi
and Adrian Curic",
title = "Translating discrete-time {Simulink} to {Lustre}",
journal = j-TECS,
volume = "4",
number = "4",
pages = "779--818",
month = nov,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1113830.1113834",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kadayif:2005:CDH,
author = "I. Kadayif and M. Kandemir and G. Chen and N.
Vijaykrishnan and M. J. Irwin and A. Sivasubramaniam",
title = "Compiler-directed high-level energy estimation and
optimization",
journal = j-TECS,
volume = "4",
number = "4",
pages = "819--850",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2005:ADR,
author = "J. Hu and M. Kandemir and N. Vijaykrishnan and M. J.
Irwin",
title = "Analyzing data reuse for cache reconfiguration",
journal = j-TECS,
volume = "4",
number = "4",
pages = "851--876",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{He:2005:RFL,
author = "Tian He and Chengdu Huang and Brian M. Blum and John
A. Stankovic and Tarek F. Abdelzaher",
title = "Range-free localization and its impact on large scale
sensor networks",
journal = j-TECS,
volume = "4",
number = "4",
pages = "877--906",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gaujal:2005:SPA,
author = "Bruno Gaujal and Nicolas Navet and Cormac Walsh",
title = "Shortest-path algorithms for real-time scheduling of
{FIFO} tasks with minimal energy use",
journal = j-TECS,
volume = "4",
number = "4",
pages = "907--933",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bartolini:2005:OIC,
author = "S. Bartolini and C. A. Prete",
title = "Optimizing instruction cache performance of embedded
systems",
journal = j-TECS,
volume = "4",
number = "4",
pages = "934--965",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 16 10:59:18 MST 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2006:RDL,
author = "W. Zhang and Y.-F. Tsai and D. Duarte and N.
Vijaykrishnan and M. Kandemir and M. J. Irwin",
title = "Reducing dynamic and leakage energy in {VLIW}
architectures",
journal = j-TECS,
volume = "5",
number = "1",
pages = "1--28",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Coussy:2006:FMH,
author = "Philippe Coussy and Emmanuel Casseau and Pierre Bomel
and Adel Baganne and Eric Martin",
title = "A formal method for hardware {IP} design and
integration under {I/O} and timing constraints",
journal = j-TECS,
volume = "5",
number = "1",
pages = "29--53",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Varea:2006:DFN,
author = "Mauricio Varea and Bashir M. Al-Hashimi and Luis A.
Cort{\'e}S and Petru Eles and Zebo Peng",
title = "{Dual Flow Nets}: {Modeling} the control\slash
data-flow relation in embedded systems",
journal = j-TECS,
volume = "5",
number = "1",
pages = "54--81",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{AbouGhazaleh:2006:COS,
author = "Nevine AbouGhazaleh and Daniel Moss{\'e} and Bruce R.
Childers and Rami Melhem",
title = "Collaborative operating system and compiler power
management for real-time applications",
journal = j-TECS,
volume = "5",
number = "1",
pages = "82--115",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dean:2006:STI,
author = "Alexander G. Dean",
title = "Software thread integration for embedded system
display applications",
journal = j-TECS,
volume = "5",
number = "1",
pages = "116--151",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alur:2006:PAR,
author = "Rajeev Alur and Thao Dang and Franjo
Ivan{\v{c}}i{\'c}",
title = "Predicate abstraction for reachability analysis of
hybrid systems",
journal = j-TECS,
volume = "5",
number = "1",
pages = "152--199",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seth:2006:FFA,
author = "Kiran Seth and Aravindh Anantaraman and Frank Mueller
and Eric Rotenberg",
title = "{FAST}: {Frequency-Aware Static Timing} analysis",
journal = j-TECS,
volume = "5",
number = "1",
pages = "200--224",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2006:RCS,
author = "G. Chen and M. Kandemir and M. J. Irwin and J.
Ramanujam",
title = "Reducing code size through address register
assignment",
journal = j-TECS,
volume = "5",
number = "1",
pages = "225--258",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu May 18 08:17:05 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jerraya:2006:GEC,
author = "Ahmed Jerraya and Trevor Mudge",
title = "Guest editorial: {Concurrent} hardware and software
design for multiprocessor {SoC}",
journal = j-TECS,
volume = "5",
number = "2",
pages = "259--262",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xu:2006:DMA,
author = "Jiang Xu and Wayne Wolf and Joerg Henkel and Srimat
Chakradhar",
title = "A design methodology for application-specific
networks-on-chip",
journal = j-TECS,
volume = "5",
number = "2",
pages = "263--280",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kangas:2006:UBM,
author = "Tero Kangas and Petri Kukkala and Heikki Orsila and
Erno Salminen and Marko H{\"a}nnik{\"a}inen and Timo D.
H{\"a}m{\"a}l{\"a}inen and Jouni Riihim{\"a}ki and
Kimmo Kuusilinna",
title = "{UML}-based multiprocessor {SoC} design framework",
journal = j-TECS,
volume = "5",
number = "2",
pages = "281--320",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hua:2006:EEE,
author = "Shaoxiong Hua and Gang Qu and Shuvra S.
Bhattacharyya",
title = "Energy-efficient embedded software implementation on
multiprocessor system-on-chip with multiple voltages",
journal = j-TECS,
volume = "5",
number = "2",
pages = "321--341",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hessel:2006:SRA,
author = "Fabiano Hessel and Vitor M. {Da Rosa} and Carlos
Eduardo Reif and C{\'e}sar Marcon and Tatiana {Gadelha
Serra Dos Santos}",
title = "Scheduling refinement in abstract {RTOS} models",
journal = j-TECS,
volume = "5",
number = "2",
pages = "342--354",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ou:2006:DSE,
author = "Jingzhao Ou and Viktor K. Prasanna",
title = "Design space exploration using arithmetic-level
hardware--software cosimulation for configurable
multiprocessor platforms",
journal = j-TECS,
volume = "5",
number = "2",
pages = "355--382",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Loghi:2006:CCT,
author = "Mirko Loghi and Massimo Poncino and Luca Benini",
title = "Cache coherence tradeoffs in shared-memory {MPSoCs}",
journal = j-TECS,
volume = "5",
number = "2",
pages = "383--407",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lapalme:2006:NEE,
author = "James Lapalme and El Mostapha Aboulhamid and Gabriela
Nicolescu",
title = "A new efficient {EDA} tool design methodology",
journal = j-TECS,
volume = "5",
number = "2",
pages = "408--430",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reshadi:2006:RFI,
author = "Mehrdad Reshadi and Nikil Dutt and Prabhat Mishra",
title = "A retargetable framework for instruction-set
architecture simulation",
journal = j-TECS,
volume = "5",
number = "2",
pages = "431--452",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Memik:2006:ENP,
author = "Gokhan Memik and William H. Mangione-Smith",
title = "Evaluating {Network Processors} using {NetBench}",
journal = j-TECS,
volume = "5",
number = "2",
pages = "453--471",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Udayakumaran:2006:DAS,
author = "Sumesh Udayakumaran and Angel Dominguez and Rajeev
Barua",
title = "Dynamic allocation for scratch-pad memory using
compile-time decisions",
journal = j-TECS,
volume = "5",
number = "2",
pages = "472--511",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Aug 23 05:26:43 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2006:EEU,
author = "Haisang Wu and Binoy Ravindran and E. Douglas Jensen
and Peng Li",
title = "Energy-efficient, utility accrual scheduling under
resource constraints for mobile embedded systems",
journal = j-TECS,
volume = "5",
number = "3",
pages = "513--542",
month = aug,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1165780.1165781",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Oct 11 06:45:18 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present an energy-efficient, utility accrual,
real-time scheduling algorithm called ReUA. ReUA
considers an application model where activities are
subject to time/utility function time constraints,
mutual exclusion constraints on shared non-CPU
resources, and statistical performance requirements on
individual activity timeliness behavior. The algorithm
targets mobile embedded systems where {\em
system-level\/} energy consumption is also a major
concern. For such a model, we consider the scheduling
objectives of (1) satisfying the statistical
performance requirements and (2) maximizing the
system-level energy efficiency, while respecting
resource constraints. Since the problem is NP-hard,
ReUA allocates CPU cycles using statistical properties
of application cycle demands, and heuristically
computes schedules with a polynomial time cost. We
analytically establish several timeliness and
nontimeliness properties of the algorithm. Further, our
simulation experiments illustrate ReUA's effectiveness
and superiority.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Luo:2006:EEI,
author = "Liqian Luo and Tarek F. Abdelzaher and Tian He and
John A. Stankovic",
title = "{EnviroSuite}: an environmentally immersive
programming framework for sensor networks",
journal = j-TECS,
volume = "5",
number = "3",
pages = "543--576",
month = aug,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1165780.1165782",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Oct 11 06:45:18 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sensor networks open a new frontier for
embedded-distributed computing. Paradigms for sensor
network programming-in-the-large have been identified
as a significant challenge toward developing
large-scale applications. Classical programming
languages are too low-level. This paper presents the
design, implementation, and evaluation of EnviroSuite,
a programming framework that introduces a new paradigm,
called environmentally immersive programming, to
abstract distributed interactions with the environment.
Environmentally immersive programming refers to an
object-based programming model in which individual
objects represent physical elements in the external
environment. It allows the programmer to think directly
in terms of environmental abstractions. EnviroSuite
provides language primitives for environmentally
immersive programming that map transparently into a
support library of distributed algorithms for tracking
and environmental monitoring. We show how nesC code of
realistic applications is significantly simplified
using EnviroSuite and demonstrate the resulting system
performance on Mica2 and XSM platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gebotys:2006:SMC,
author = "Catherine H. Gebotys",
title = "A split-mask countermeasure for low-energy secure
embedded systems",
journal = j-TECS,
volume = "5",
number = "3",
pages = "577--612",
month = aug,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1165780.1165783",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Oct 11 06:45:18 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Future wireless embedded devices will be increasingly
powerful, supporting many more applications, including
one of the most crucial---security. Although many
embedded devices offer more resistance to bus---probing
attacks because of their compact size, susceptibility
to power or electromagnetic analysis attacks must be
analyzed. This paper presents a new split-mask
countermeasure to thwart low-order differential power
analysis (DPA) and differential EM analysis (DEMA). For
the first time, real-power and EM measurements are used
to analyze the difficulty of launching new third-order
DPA and DEMA attacks on a popular low-energy 32-bit
embedded ARM processor. Results show that the new
split-mask countermeasure provides increased security
without large overheads of energy dissipation, compared
to previous research. With the emergence of security
applications in PDAs, cell phones, and other embedded
devices, low-energy countermeasures for resistance to
low-order DPA/DEMA is crucial for supporting future
enabled wireless internet.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhuang:2006:PLS,
author = "Xiaotong Zhuang and Santosh Pande",
title = "Parallelizing load\slash stores on dual-bank memory
embedded processors",
journal = j-TECS,
volume = "5",
number = "3",
pages = "613--657",
month = aug,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1165780.1165784",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Oct 11 06:45:18 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many modern embedded processors such as DSPs support
partitioned memory banks (also called X--Y memory or
dual-bank memory) along with parallel load/store
instructions to achieve higher code density and
performance. In order to effectively utilize the
parallel load/store instructions, the compiler must
partition the memory-resident values and assign them to
X or Y bank. This paper gives a postregister allocation
solution to merge the generated load/store instructions
into their parallel counterparts. Simultaneously, our
framework performs allocation of values to X or Y
memory banks. We first remove as many load/stores and
register--register moves as possible through an
excellent iterated coalescing based register allocator
by Appel and George [1996]. We then attempt to
parallelize the generated load/stores using a multipass
approach. The basic phase of our approach attempts the
merger of load/stores without duplication and web
splitting. We model this problem as a graph-coloring
problem in which each value is colored as either X or
Y. We then construct a motion scheduling graph (MSG),
based on the range of motion for each load/store
instruction. MSG reflects potential instructions that
could be merged. We propose a notion of pseudofixed
boundaries so that the load/store movement is less
affected by register dependencies. We prove that the
coloring problem for MSG is NP-complete and solve it
with two different heuristic algorithms with different
complexity. We then propose a two-level iterative
process to attempt instruction duplication, variable
duplication, web splitting, and local conflict
elimination to effectively merge the remaining
load/stores. Finally, we clean up some multiple-aliased
load/stores. To improve the performance, we combine
profiling information with each stage coupled with some
modifications to the algorithm. We show that our
framework results in parallelization of a large number
of load/stores without much growth in data and code
segments. The average speedup for our optimization pass
reaches roughly 13\% if no profile information is
available and 17\% with profile information. The
average code and data segment growth is controlled
within 13\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jones:2006:RPW,
author = "Alex K. Jones and Raymond Hoare and Dara Kusic and
Gayatri Mehta and Josh Fazekas and John Foster",
title = "Reducing power while increasing performance with
{SuperCISC}",
journal = j-TECS,
volume = "5",
number = "3",
pages = "658--686",
month = aug,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1165780.1165785",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Oct 11 06:45:18 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multiprocessor Systems on Chips (MPSoCs) have become a
popular architectural technique to increase
performance. However, MPSoCs may lead to undesirable
power consumption characteristics for computing systems
that have strict power budgets, such as PDAs, mobile
phones, and notebook computers. This paper presents the
super-complex instruction-set computing (SuperCISC)
Embedded Processor Architecture and, in particular,
investigates performance and power consumption of this
device compared to traditional processor
architecture-based execution. SuperCISC is a
heterogeneous, multicore processor architecture
designed to exceed performance of traditional embedded
processors while maintaining a reduced power budget
compared to low-power embedded processors. At the heart
of the SuperCISC processor is a multicore VLIW (Very
Large Instruction Word) containing several homogeneous
execution cores/functional units. In addition, complex
and heterogeneous combinational hardware function cores
are tightly integrated to the core VLIW engine
providing an opportunity for improved performance and
reduced energy consumption. Our SuperCISC processor
core has been synthesized for both a 90-nm Stratix II
Field Programmable Gate Aray (FPGA) and a 160-nm
standard cell Application-Specific Integrated Circuit
(ASIC) fabrication process from OKI, each operating at
approximately 167 MHz for the VLIW core. We examine
several reasons for speedup and power improvement
through the SuperCISC architecture, including
predicated control flow, cycle compression, and a
reduction in arithmetic power consumption, which we
call power compression. Finally, testing our SuperCISC
processor with multimedia and signal-processing
benchmarks, we show how the SuperCISC processor can
provide performance improvements ranging from 7X to
160X with an average of 60X, while also providing
orders of magnitude of power improvements for the
computational kernels. The power improvements for our
benchmark kernels range from just over 40X to over
400X, with an average savings exceeding 130X. By
combining these power and performance improvements, our
total energy improvements all exceed 1000X. As these
savings are limited to the computational kernels of the
applications, which often consume approximately 90\% of
the execution time, we expect our savings to approach
the ideal application improvement of 10X.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Girault:2006:ARD,
author = "Alain Girault and Xavier Nicollin and Marc Pouzet",
title = "Automatic rate desynchronization of embedded reactive
programs",
journal = j-TECS,
volume = "5",
number = "3",
pages = "687--717",
month = aug,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1165780.1165786",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Oct 11 06:45:18 MDT 2006",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many embedded reactive programs perform computations
at different rates, while still requiring the overall
application to satisfy very tight temporal constraints.
We propose a method to automatically distribute
programs such that the obtained parts can be run at
different rates, which we call rate desynchronization.
We consider general programs whose control structure is
a finite state automaton and with a DAG of actions in
each state. The motivation is to take into account
long-duration tasks inside the programs: these are
tasks whose execution time is long compared to the
other computations in the application, and whose
maximal execution rate is known and bounded. Merely
scheduling such a long duration task at a slow rate
would not work since the whole program would be slowed
down if compiled into sequential code. It would thus be
impossible to meet the temporal constraints, unless
such long duration tasks could be desynchronized from
the remaining computations. This is precisely what our
method achieves: it distributes the initial program
into several parts, so that the parts performing the
slow computations can be run at an appropriate rate,
therefore not impairing the global reaction time of the
program. We present in detail our method, all the
involved algorithms, and a small running example. We
also compare our method with the related work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Biswas:2006:MOP,
author = "Surupa Biswas and Thomas Carley and Matthew Simpson
and Bhuvan Middha and Rajeev Barua",
title = "Memory overflow protection for embedded systems using
run-time checks, reuse, and compression",
journal = j-TECS,
volume = "5",
number = "4",
pages = "719--752",
month = nov,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1196636.1196637",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:45 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems usually lack virtual memory and are
vulnerable to memory overflow since they lack a
mechanism to detect overflow or use swap space
thereafter. We present a method to detect memory
overflows using compiler-inserted software run-time
checks. Its overheads in run-time and energy are 1.35
and 1.12\%, respectively. Detection of overflow allows
system-specific remedial action. We also present
techniques to grow the stack or heap segment after they
overflow, into previously unutilized space, such as
dead variables, free holes in the heap, and space freed
by compressing live variables. These may avoid the
out-of-memory error if the space recovered is enough to
complete execution. The reuse methods are able to grow
the stack or heap beyond its overflow by an amount that
varies widely by application---the amount of recovered
space ranges from 0.7 to 93.5\% of the combined stack
and heap size.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "data compression; heap overflow; out-of-memory errors;
reliability; reuse; run-time checks; stack overflow",
}
@Article{Higuera-Toledano:2006:HSD,
author = "M. Teresa Higuera-Toledano",
title = "Hardware support for detecting illegal references in a
multiapplication real-time {Java} environment",
journal = j-TECS,
volume = "5",
number = "4",
pages = "753--772",
month = nov,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1196636.1196638",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:45 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Our objective is to adapt the Java memory management
to an embedded system, e.g., a wireless PDA executing
concurrent multimedia applications within a single JVM.
This paper provides software, and hardware-based
solutions detecting both illegal references across the
application memory spaces and dangling pointers within
an application space. We give an approach to
divide/share the memory among the applications
executing concurrently in the system. We introduce and
define application-specific memory, building upon the
real-time specification for Java (RTSJ) from the
real-time Java expert group. The memory model used in
RTSJ imposes strict rules for assignment between memory
areas, preventing the creation of dangling pointers,
and thus maintaining the pointer safety of Java. Our
implementation solution to ensure the checking of these
rules before each assignment inserts write barriers
that use a stack-based algorithm. This solution
adversely affects both the performance and
predictability of the RTSJ applications, which can be
improved by using an existing hardware support.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "garbage collection; memory management; write
barriers",
}
@Article{Winter:2006:TPC,
author = "Victor L. Winter and Jason Beranek and Fares Fraij and
Steve Roach and Greg Wickstrom",
title = "A transformational perspective into the core of an
abstract class loader for the {SSP}",
journal = j-TECS,
volume = "5",
number = "4",
pages = "773--818",
month = nov,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1196636.1196639",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:45 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The SSP is a hardware implementation of a subset of
the JVM for use in high-consequence embedded
applications. In this context, a majority of the
activities belonging to class loading, as it is defined
in the specification of the JVM, can be performed
statically. Static class loading has the net result of
dramatically simplifying the design of the SSP, as well
as increasing its performance. Because of the high
consequence nature of its applications, strong evidence
must be provided that all aspects of the SSP have been
implemented correctly. This includes the class loader.
This article explores the possibility of formally
verifying a class loader for the SSP implemented in the
strategic programming language TL. Specifically, an
implementation of the core activities of an abstract
class loader is presented and its verification in ACL2
is considered.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "HATS; higher-order rewriting; SSP; strategic
programming; TL",
}
@Article{Kulkarni:2006:VVI,
author = "Prasad Kulkarni and Wankang Zhao and Stephen Hines and
David Whalley and Xin Yuan and Robert van Engelen and
Kyle Gallivan and Jason Hiser and Jack Davidson and
Baosheng Cai and Mark Bailey and Hwashin Moon and
Kyunghwan Cho and Yunheung Paek",
title = "{VISTA}: {VPO} interactive system for tuning
applications",
journal = j-TECS,
volume = "5",
number = "4",
pages = "819--863",
month = nov,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1196636.1196640",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:45 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Software designers face many challenges when
developing applications for embedded systems. One major
challenge is meeting the conflicting constraints of
speed, code size, and power consumption. Embedded
application developers often resort to hand-coded
assembly language to meet these constraints since
traditional optimizing compiler technology is usually
of little help in addressing this challenge. The
results are software systems that are not portable,
less robust, and more costly to develop and maintain.
Another limitation is that compilers traditionally
apply the optimizations to a program in a fixed order.
However, it has long been known that a single ordering
of optimization phases will not produce the best code
for every application. In fact, the smallest unit of
compilation in most compilers is typically a function
and the programmer has no control over the code
improvement process other than setting flags to enable
or disable certain optimization phases. This paper
describes a new code improvement paradigm implemented
in a system called VISTA that can help achieve the
cost/performance trade-offs that embedded applications
demand. The VISTA system opens the code improvement
process and gives the application programmer, when
necessary, the ability to finely control it. VISTA also
provides support for finding effective sequences of
optimization phases. This support includes the ability
to interactively get static and dynamic performance
information, which can be used by the developer to
steer the code improvement process. This performance
information is also internally used by VISTA for
automatically selecting the best optimization sequence
from several attempted. One such feature is the use of
a genetic algorithm to search for the most efficient
sequence based on specified fitness criteria. We
include a number of experimental results that evaluate
the effectiveness of using a genetic algorithm in VISTA
to find effective optimization phase sequences.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "genetic algorithms; interactive compilation; phase
ordering; user-directed code improvement",
}
@Article{Ottoni:2006:OAU,
author = "Desiree Ottoni and Guilherme Ottoni and Guido Araujo
and Rainer Leupers",
title = "Offset assignment using simultaneous variable
coalescing",
journal = j-TECS,
volume = "5",
number = "4",
pages = "864--883",
month = nov,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1196636.1196641",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:45 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The generation of efficient addressing code is a
central problem in compiling for processors with
restricted addressing modes, like digital signal
processors (DSPs). Offset assignment (OA) is the
problem of allocating scalar variables to memory, so as
to minimize the need of addressing instructions. This
problem is called simple offset assignment (SOA) when a
single address register is available, and general
offset assignment (GOA) when more address registers are
used. This paper shows how variables' liveness
information can be used to dramatically reduce the
addressing instructions required to access local
variables on the program stack. Two techniques that
make effective use of variable coalescing to solve SOA
and GOA are described, namely coalescing SOA (CSOA) and
coalescing GOA (CGOA). In addition, a thorough
comparison between these algorithms and others
described in the literature is presented. The
experimental results, when compiling MediaBench
benchmark programs with the LANCE compiler, reveal a
very significant improvement of the proposed techniques
over the other available solutions to the problem.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "address registers; autoincrement addressing modes;
DSPs; register allocation; stack offset assignment;
variable coalescing",
}
@Article{Whalley:2007:GE,
author = "David Whalley",
title = "Guest {Editorial}",
journal = j-TECS,
volume = "6",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1216577",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kumar:2007:ESI,
author = "Nagendra J. Kumar and Vasanth Asokan and Siddhartha
Shivshankar and Alexander G. Dean",
title = "Efficient software implementation of embedded
communication protocol controllers using asynchronous
software thread integration with time- and
space-efficient procedure calls",
journal = j-TECS,
volume = "6",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210270",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The overhead of context switching limits efficient
scheduling of multiple concurrent threads on a
uniprocessor when real-time requirements exist. A
software-implemented protocol controller may be
crippled by this problem. The available idle time may
be too short to recover through context switching, so
only the primary thread can execute during message
activity, slowing the secondary threads and potentially
missing deadlines. Asynchronous software thread
integration (ASTI) uses coroutine calls and
integration, letting threads make independent progress
efficiently, and reducing the needed context switches.
We demonstrate the methods with a software
implementation of an automotive communication protocol
(J1850) and several secondary threads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "asynchronous software thread integration; fine-grain
concurrency; hardware to software migration; J1850;
software-implemented communication protocol
controllers",
}
@Article{Zhuang:2007:PEP,
author = "Xiaotong Zhuang and Santosh Pande",
title = "Power-efficient prefetching for embedded processors",
journal = j-TECS,
volume = "6",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210271",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Because of stringent power constraints, aggressive
latency-hiding approaches, such as prefetching, are
absent in the state-of-the-art embedded processors.
There are two main reasons that make prefetching power
inefficient. First, compiler-inserted prefetch
instructions increase code size and, therefore, could
increase I-cache power. Second, inaccurate prefetching
(especially for hardware prefetching) leads to high
D-cache power consumption because of useless accesses.
In this work, we show that it is possible to support
power-efficient prefetching through bit-differential
offset assignment. We target the prefetching of
relocatable stack variables with a high degree of
precision. By assigning the offsets of stack variables
in such a way that most consecutive addresses differ by
1 bit, we can prefetch them with compact prefetch
instructions to save I-cache power. The compiler first
generates an access graph of consecutive memory
references and then attempts a layout of the memory
locations in the smallest hypercube. Each dimension of
the hypercube represents a 1-bit differential
addressing. The embedding is carried out in as compact
a hypercube as possible in order to save memory space.
Each load/store instruction carries a hint regarding
prefetching the next memory reference by encoding its
differential address with respect to the current one.
To reduce D-cache power cost, we further attempt to
assign offsets so that most of the consecutive accesses
map to the same cache line. Our prefetching is done
using a one entry line buffer [Wilson et al. 1996].
Consequently, many look-ups in D-cache reduce to
incremental ones. This results in D-cache activity
reduction and power savings. Our prefetcher requires
both compiler and hardware support. In this paper, we
provide implementation on the processor model close to
ARM with small modification to the ISA. We tackle
issues such as out-of-order commit, predication, and
speculation through simple modifications to the
processor pipeline on noncritical paths. Our goal in
this work is to boost performance while
maintaining/lowering power consumption. Our results
show 12\% speedup and slight power reduction. The
runtime virtual space loss for stack and static data is
about 11.8\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "bit-differential addressing; data prefetching;
embedded processors; offset assignment",
}
@Article{Contreras:2007:XPP,
author = "Gilberto Contreras and Margaret Martonosi and Jinzhang
Peng and Guei-Yuan Lueh and Roy Ju",
title = "The {XTREM} power and performance simulator for the
{Intel XScale} core: {Design} and experiences",
journal = j-TECS,
volume = "6",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210272",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Managing power concerns in microprocessors has become
a pressing research problem across the domains of
computer architecture, CAD, and compilers. As a result,
several parameterized cycle-level power simulators have
been introduced. While these simulators can be quite
useful for microarchitectural studies, their generality
limits how accurate they can be for any one chip
family. Furthermore, their hardware focus means that
they do not explicitly enable studying the interaction
of different software layers, such as Java applications
and their underlying runtime system software. This
paper describes and evaluates XTREM, a power-simulation
tool tailored for the Intel XScale microarchitecture.
In building XTREM, our goals were to develop a
microarchitecture simulator that, while still offering
size parameterizations for cache and other structures,
more accurately reflected a realistic processor
pipeline. We present a detailed set of validations
based on multimeter power measurements and hardware
performance counter sampling. XTREM exhibits an average
performance error of only 6.5\% and an even smaller
average power error: 4\%. The paper goes on to present
an application study enabled by the simulator. Namely,
we use XTREM to produce an energy consumption breakdown
for Java CDC and CLDC applications. Our simulator
measurements indicate that a large percentage of the
total energy consumption (up to 35\%) is devoted to the
virtual machine's support functions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Intel XScale technology; Java; power measurements;
power modeling",
}
@Article{DeSutter:2007:LTC,
author = "Bjorn {De Sutter} and Ludo {Van Put} and Dominique
Chanet and Bruno {De Bus} and Koen {De Bosschere}",
title = "Link-time compaction and optimization of {ARM}
executables",
journal = j-TECS,
volume = "6",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210273",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The overhead in terms of code size, power consumption,
and execution time caused by the use of precompiled
libraries and separate compilation is often
unacceptable in the embedded world, where real-time
constraints, battery life-time, and production costs
are of critical importance. In this paper, we present
our link-time optimizer for the ARM architecture. We
discuss how we can deal with the peculiarities of the
ARM architecture related to its visible program counter
and how the introduced overhead can to a large extent
be eliminated. Our link-time optimizer is evaluated
with four tool chains, two proprietary ones from ARM
and two open ones based on GNU GCC. When used with
proprietary tool chains from ARM Ltd., our link-time
optimizer achieved average code size reductions of 16.0
and 18.5\%, while the programs have become 12.8 and
12.3\% faster, and 10.7 to 10.1\% more energy
efficient. Finally, we show how the incorporation of
link-time optimization in tool chains may influence
library interface design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compaction; linker; optimization; performance",
}
@Article{Panainte:2007:MCR,
author = "Elena Moscu Panainte and Koen Bertels and Stamatis
Vassiliadis",
title = "The {Molen} compiler for reconfigurable processors",
journal = j-TECS,
volume = "6",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210274",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this paper, we describe the compiler developed to
target the Molen reconfigurable processor and
programming paradigm. The compiler automatically
generates optimized binary code for C applications,
based on pragma annotation of the code executed on the
reconfigurable hardware. For the IBM PowerPC 405
processor included in the Virtex II Pro platform FPGA,
we implemented code generation, register, and stack
frame allocation following the PowerPC EABI (embedded
application binary interface). The PowerPC backend has
been extended to generate the appropriate instructions
for the reconfigurable hardware and data transfer,
taking into account the information of the specific
hardware implementations and system. Starting with an
annotated C application, a complete design flow has
been integrated to generate the executable bitstream
for the reconfigurable processor. The flexible design
of the proposed infrastructure allows to consider the
special features of the reconfigurable architectures.
In order to hide the reconfiguration latencies, we
implemented an instruction-scheduling algorithm for the
dynamic hardware configuration instructions. The
algorithm schedules, in advance, the hardware
configuration instructions, taking into account the
conflicts for the reconfigurable hardware resources
(FPGA area) between the hardware operations. To verify
the Molen compiler, we used the multimedia video frame
M-JPEG encoder of which the extended discrete cosine
transform (DCT*) function was mapped on the FPGA. We
obtained an overall speedup of 2.5 (about 84\%
efficiency over the maximal theoretical speedup of
2.96). The performance efficiency is achieved using
automatically generated nonoptimized DCT* hardware
implementation. The instruction-scheduling algorithm
has been tested for DCT, quantization, and VLC
operations. Based on simulation results, we determine
that, while a simple scheduling produces a significant
performance decrease, our proposed scheduling
contributes for up to $ 16 \times $ M-JPEG encoder
speedup.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "FPGA; instruction scheduling; reconfigurable
computing",
}
@Article{Tan:2007:TAP,
author = "Yudong Tan and Vincent Mooney",
title = "Timing analysis for preemptive multitasking real-time
systems with caches",
journal = j-TECS,
volume = "6",
number = "1",
pages = "7:1--7:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210275",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this paper, we propose an approach to estimate the
worst-case response time (WCRT) of each task in a
preemptive multitasking single-processor real-time
system utilizing an L1 cache. The approach combines
intertask cache-eviction analysis and intratask
cache-access analysis to estimate the number of cache
lines that can possibly be evicted by the preempting
task and also be accessed again by the preempted task
after preemptions (thus requiring the preempted task to
reload the cache line(s)). This cache-reload delay
caused by preempting task(s) is then incorporated into
WCRT analysis. Three sets of applications with up to
six concurrent tasks running are used to test our
approach. The experimental results show that our
approach can tighten the WCRT estimate by up to 32\% ($
1.4 \times $) over prior state-of-the-art.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "real-time; worst-case response time",
}
@Article{Ratschan:2007:SVH,
author = "Stefan Ratschan and Zhikun She",
title = "Safety verification of hybrid systems by constraint
propagation-based abstraction refinement",
journal = j-TECS,
volume = "6",
number = "1",
pages = "8:1--8:??",
month = feb,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1210268.1210276",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:20:58 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper deals with the problem of safety
verification of nonlinear hybrid systems. We start from
a classical method that uses interval arithmetic to
check whether trajectories can move over the boundaries
in a rectangular grid. We put this method into an
abstraction refinement framework and improve it by
developing an additional refinement step that employs
interval-constraint propagation to add information to
the abstraction without introducing new grid elements.
Moreover, the resulting method allows switching
conditions, initial states, and unsafe states to be
described by complex constraints, instead of sets that
correspond to grid elements. Nevertheless, the method
can be easily implemented, since it is based on a
well-defined set of constraints, on which one can run
any constraint propagation-based solver. Tests of such
an implementation are promising.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "constraint propagation; hybrid systems; intervals",
}
@Article{Schepers:2007:GEI,
author = "Henk Schepers",
title = "Guest editorial: {Introduction} to the special issue
on software and compilers for embedded systems",
journal = j-TECS,
volume = "6",
number = "2",
pages = "9:1--9:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1234675.1234676",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:17 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2007:SCT,
author = "Sheayun Lee and Jaejin Lee and Chang Yun Park and Sang
Lyul Min",
title = "Selective code transformation for dual instruction set
processors",
journal = j-TECS,
volume = "6",
number = "2",
pages = "10:1--10:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1234675.1234677",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:17 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems are often constrained in terms of
both code size and execution time, because of a limited
amount of available memory and real-time nature of
applications. A dual instruction set processor, which
supports a reduced instruction set (16
bits/instruction), in addition to a full instruction
set (32 bits/instruction), allows an opportunity for a
tradeoff between these two design criteria.
Specifically, while the reduced instruction set can be
used to reduce code size by providing smaller
instructions, a program compiled into the reduced
instruction set typically runs slower than the same
program compiled into the full instruction set.
Motivated by this observation, we propose a code
generation technique that exploits this tradeoff
relationship by selectively using the two instruction
sets for different sections in the program. The
proposed technique, called selective code
transformation, not only provides a mechanism to enable
a flexible tradeoff between a program's code size and
its execution time, but also facilitates program
optimization toward enhancing its worst case
performance. The results from our experiments show that
our proposed technique can be effectively used to
fine-tune an application program on a spectrum of code
size and execution performance, which, in turn, enables
a system-wide optimization on memory space and
execution speed involving multiple applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "dual instruction set processors; mixed-width
instruction set architecture; reduced bid-width
instruction set architecture",
}
@Article{Zhang:2007:RBP,
author = "Wei Zhang and Bramha Allu",
title = "Reducing branch predictor leakage energy by exploiting
loops",
journal = j-TECS,
volume = "6",
number = "2",
pages = "11:1--11:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1234675.1234678",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:17 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the scaling of technology, leakage energy will
become the dominant source of energy consumption.
Besides cache memories, branch predictors are among the
largest on-chip array structures and consume nontrivial
leakage energy. This paper proposes two cost-effective
loop-based strategies to reduce the branch predictor
leakage without impacting prediction accuracy or
performance. The loop-based approaches exploit the fact
that loops usually only contain a small number of
instructions and, hence, even fewer branch instructions
while taking a significant fraction of the execution
time. Consequently, all the nonactive entries of branch
predictors can be placed into the low leakage mode
during the loop execution in order to reduce leakage
energy. Compiler and circuit supports are discussed to
implement the proposed leakage-reduction strategies.
Compared to the recently proposed decay-based approach,
our experimental results show that the loop-based
approach can extract 16.2\% more dead time of the
branch predictor, on average, leading to more leakage
energy savings without impacting the branch prediction
accuracy and performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "branch prediction; compiler; leakage energy",
}
@Article{Scharwaechter:2007:AAE,
author = "Hanno Scharwaechter and David Kammler and Andreas
Wieferink and Manuel Hohenauer and Kingshuk Karuri and
Jianjiang Ceng and Rainer Leupers and Gerd Ascheid and
Heinrich Meyr",
title = "{ASIP} architecture exploration for efficient {IPSec}
encryption: a case study",
journal = j-TECS,
volume = "6",
number = "2",
pages = "12:1--12:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1234675.1234679",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:17 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Application-Specific Instruction-Set Processors
(ASIPs) are becoming increasingly popular in the world
of customized, application-driven System-on-Chip (SoC)
designs. Efficient ASIP design requires an iterative
architecture exploration loop---gradual refinement of
the processor architecture starting from an initial
template. To accomplish this task, design automation
tools are used to detect bottlenecks in embedded
applications, to implement application-specific
processor instructions, and to automatically generate
the required software tools (such as instruction-set
simulator, C-compiler, assembler, and profiler), as
well as to synthesize the hardware. This paper
describes an architecture exploration loop for an ASIP
coprocessor that implements common encryption
functionality used in symmetric block cipher algorithms
for internet protocol security (IPSec). The coprocessor
is accessed via shared memory and, as a consequence,
our approach is easily adaptable to arbitrary main
processor architectures. This paper presents the
extended version of our case study that has been
already published on the SCOPES conference in 2004. In
both papers, a MIPS architecture is used as the main
processor and Blowfish as encryption algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ADL; ASIP; computer-aided design; IPSec",
}
@Article{Turjan:2007:CIC,
author = "Alexandru Turjan and Bart Kienhuis and Ed Deprettere",
title = "Classifying interprocess communication in process
network representation of nested-loop programs",
journal = j-TECS,
volume = "6",
number = "2",
pages = "13:1--13:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1234675.1234680",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:17 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "New embedded signal-processing architectures are
emerging that are composed of loosely coupled
heterogeneous components like CPUs or DSPs, specialized
IP cores, reconfigurable units, or memories. We believe
that these architectures should be programmed using the
process network model of computation. To ease the
mapping of applications, we are developing the Compaan
compiler that automatically derives a process network
(PN) description from an application written in Matlab
or C. In this paper, we investigate a particular
problem in nested loop programs, which is about
classifying the interprocess communication in the PN
representation of the nested loop program. The global
memory arrays present in the code have to be replaced
by a distributed communication structure used for
communicating data between the network processes. We
show that four types of communication exist, each
exhibiting different requirements when realizing them
in hardware or software. We first present two compile
time tests that are based on integer linear programming
to decide the type of the communication. In the second
part of this paper, we present alternative
classification techniques that have polynomial
complexity. However, in some cases, those techniques do
not give a definitive answer and the ILP tests have to
be applied. All present tests are combined in a hybrid
classification scheme that correctly classifies the
interprocess communication. In only 5\% of the cases to
classify, we have to rely on integer linear programming
while, in the remaining 95\%, the alternative
techniques presented in this paper are able to
correctly classify each case. The hybrid classification
scheme has become an important part of our Compaan
compiler.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "hybrid classification approach; integer linear
programming; matrix manipulations; static analysis",
}
@Article{Ko:2007:BSA,
author = "Ming-Yung Ko and Praveen K. Murthy and Shuvra S.
Bhattacharyya",
title = "Beyond single-appearance schedules: {Efficient DSP}
software synthesis using nested procedure calls",
journal = j-TECS,
volume = "6",
number = "2",
pages = "14:1--14:??",
month = may,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1234675.1234681",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:17 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Synthesis of digital signal-processing (DSP) software
from dataflow-based formal models is an effective
approach for tackling the complexity of modern DSP
applications. In this paper, an efficient method is
proposed for applying subroutine call instantiation of
module functionality when synthesizing embedded
software from a dataflow specification. The technique
is based on a novel recursive decomposition of
subgraphs in a cluster hierarchy that is optimized for
low buffer size. Applying this technique, one can
achieve significantly lower buffer sizes than what is
available for minimum code size inlined schedules,
which have been the emphasis of prior work on software
synthesis. Furthermore, it is guaranteed that the
number of procedure calls in the synthesized program is
polynomially bounded in the size of the input dataflow
graph, even though the number of module invocations may
increase exponentially. This recursive decomposition
approach provides an efficient means for integrating
subroutine-based module instantiation into the design
space of DSP software synthesis. The experimental
results demonstrate a significant improvement in buffer
cost, especially for more irregular multirate DSP
applications, with moderate code and execution time
overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "block diagram compiler; design methodology; embedded
systems; hierarchical graph decomposition; memory
optimization; procedural implementation; synchronous
dataflow",
}
@Article{Hua:2007:PDM,
author = "Shaoxiong Hua and Gang Qu and Shuvra S.
Bhattacharyya",
title = "Probabilistic design of multimedia embedded systems",
journal = j-TECS,
volume = "6",
number = "3",
pages = "15:1--15:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1275986.1275987",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:49:41 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this paper, we propose the novel concept of
probabilistic design for multimedia embedded systems,
which is motivated by the challenge of how to design,
but not overdesign, such systems while systematically
incorporating performance requirements of multimedia
application, uncertainties in execution time, and
tolerance for reasonable execution failures. Unlike
most present techniques that are based on either worst-
or average-case execution times of application tasks,
where the former guarantees the completion of each
execution, but often leads to overdesigned systems, and
the latter fails to provide any completion guarantees,
the proposed probabilistic design method takes
advantage of unique features mentioned above of
multimedia systems to relax the rigid hardware
requirements for software implementation and avoid
overdesigning the system. In essence, this relaxation
expands the design space and we further develop an
off-line on-line minimum effort algorithm for quick
exploration of the enlarged design space at early
design stages. This is the first step toward our goal
of bridging the gap between real-time analysis and
embedded software implementation for rapid and economic
multimedia system design. It is our belief that the
proposed method has great potential in reducing system
resource while meeting performance requirements. The
experimental results confirm this as we achieve
significant saving in system's energy consumption to
provide a statistical completion ratio guarantee (i.e.,
the expected number of completions over a large number
of iterations is greater than a given value).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "completion ratio; energy minimization;
hardware/software codesign; multiple voltage;
probabilistic design; soft real-time system",
}
@Article{Koushanfar:2007:TMC,
author = "Farinaz Koushanfar and Abhijit Davare and David T.
Nguyen and Alberto Sangiovanni-Vincentelli and Miodrag
Potkonjak",
title = "Techniques for maintaining connectivity in wireless
ad-hoc networks under energy constraints",
journal = j-TECS,
volume = "6",
number = "3",
pages = "16:1--16:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1275986.1275988",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:49:41 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Distributed wireless systems (DWSs) are emerging as
the enabler for next-generation wireless applications.
There is a consensus that DWS-based applications, such
as pervasive computing, sensor networks, wireless
information networks, and speech and data communication
networks, will form the backbone of the next
technological revolution. Simultaneously, with great
economic, industrial, consumer, and scientific
potential, DWSs pose numerous technical challenges.
Among them, two are widely considered as crucial:
autonomous localized operation and minimization of
energy consumption. We address the fundamental problem
of how to maximize the lifetime of the network using
only local information, while preserving network
connectivity. We start by introducing the care-free
sleep (CS) Theorem that provides provably optimal
conditions for a node to go into sleep mode while
ensuring that global connectivity is not affected. The
CS theorem is the basis for an efficient localized
algorithm that decides which nodes will go to into
sleep mode and for how long. We have also developed
mechanisms for collecting neighborhood information and
for the coordination of distributed energy minimization
protocols. The effectiveness of the approach is
demonstrated using a comprehensive study of the
performance of the algorithm over a wide range of
network parameters. Another important highlight is the
first mathematical and Monte Carlo analysis that
establishes the importance of considering nodes within
a small number of hops in order to preserve energy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ad-hoc networks; connectivity; energy management; low
power; power management; sleeping coordination",
}
@Article{Wagner:2007:HSI,
author = "Fl{\'a}vio R. Wagner and Wander Ces{\'a}rio and Ahmed
A. Jerraya",
title = "Hardware\slash software {IP} integration using the
{ROSES} design environment",
journal = j-TECS,
volume = "6",
number = "3",
pages = "17:1--17:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1275986.1275989",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:49:41 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Considering current time-to-market pressures, IP reuse
is mandatory for the design of complex embedded
systems-on-chip (SoC). The integration of IP components
into a given design is the most complex task in the
whole reuse process. This paper describes the IP
integration approach implemented in the ROSES design
environment, which presents a unique combination of
features that enhance IP reuse: automatic assembly of
interfaces between heterogeneous software and hardware
IP components; easy adaptation to different on-chip
communication structures and bus and core standards;
generation of customized and minimal OSs for
programmable components; and an
architecture-independent high-level API embedded into
SystemC that makes application software independent
from system implementation. Application code is written
by using communication functions available in this API.
ROSES automatically assembles wrappers that implement
these functions, such that the application code does
not need to be modified in order to run in the final
synthesized system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "IP integration; systems-on-chip",
}
@Article{Lee:2007:LBB,
author = "Sang-Won Lee and Dong-Joo Park and Tae-Sun Chung and
Dong-Ho Lee and Sangwon Park and Ha-Joo Song",
title = "A log buffer-based flash translation layer using
fully-associative sector translation",
journal = j-TECS,
volume = "6",
number = "3",
pages = "18:1--18:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1275986.1275990",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:49:41 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Flash memory is being rapidly deployed as data storage
for mobile devices such as PDAs, MP3 players, mobile
phones, and digital cameras, mainly because of its low
electronic power, nonvolatile storage, high
performance, physical stability, and portability. One
disadvantage of flash memory is that prewritten data
cannot be dynamically overwritten. Before overwriting
prewritten data, a time-consuming erase operation on
the used blocks must precede, which significantly
degrades the overall write performance of flash memory.
In order to solve this ``erase-before-write'' problem,
the flash memory controller can be integrated with a
software module, called ``flash translation layer
(FTL).'' Among many FTL schemes available, the log
block buffer scheme is considered to be optimum. With
this scheme, a small number of log blocks, a kind of
write buffer, can improve the performance of write
operations by reducing the number of erase operations.
However, this scheme can suffer from low space
utilization of log blocks. In this paper, we show that
there is much room for performance improvement in the
log buffer block scheme, and propose an enhanced log
block buffer scheme, called FAST (full associative
sector translation). Our FAST scheme improves the space
utilization of log blocks using fully-associative
sector translations for the log block sectors. We also
show empirically that our FAST scheme outperforms the
pure log block buffer scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "address translation; associative mapping; flash
memory; FTL; log blocks",
}
@Article{Wu:2007:EBT,
author = "Chin-Hsien Wu and Tei-Wei Kuo and Li Ping Chang",
title = "An efficient {B-tree} layer implementation for
flash-memory storage systems",
journal = j-TECS,
volume = "6",
number = "3",
pages = "19:1--19:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1275986.1275991",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:49:41 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the significant growth of the markets for
consumer electronics and various embedded systems,
flash memory is now an economic solution for storage
systems design. Because index structures require
intensively fine-grained updates/modifications,
block-oriented access over flash memory could introduce
a significant number of redundant writes. This might
not only severely degrade the overall performance, but
also damage the reliability of flash memory. In this
paper, we propose a very different approach, which can
efficiently handle fine-grained updates/modifications
caused by B-tree index access over flash memory. The
implementation is done directly over the flash
translation layer (FTL); hence, no modifications to
existing application systems are needed. We demonstrate
that when index structures are adopted over flash
memory, the proposed methodology can significantly
improve the system performance and, at the same time,
reduce both the overhead of flash-memory management and
the energy dissipation. The average response time of
record insertions and deletions was also significantly
reduced.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "B-tree; database systems; embedded systems; flash
memory; storage systems",
}
@Article{Xie:2007:ISP,
author = "Tao Xie and Xiao Qin",
title = "Improving security for periodic tasks in embedded
systems through scheduling",
journal = j-TECS,
volume = "6",
number = "3",
pages = "20:1--20:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1275986.1275992",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:49:41 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "While many scheduling algorithms for periodic tasks
ignore security requirements posed by sensitive
applications and are, consequently, unable to perform
properly in embedded systems with security constraints,
in this paper, we present an approach to scheduling
periodic tasks in embedded systems subject to security
and timing constraints. We design a necessary and
sufficient feasibility check for a set of periodic
tasks with security requirements. With the feasibility
test in place, we propose a scheduling algorithm, or
SASES (security-aware scheduling for embedded systems),
which accounts for both security and timing
requirements. SASES judiciously distributes slack times
among a variety of security services for a set of
periodic tasks, thereby optimizing security for
embedded systems without sacrificing schedulability. To
demonstrate the effectiveness of SASES, we apply the
proposed SASES to real-world embedded systems such as
an automated flight control system. We show, through
extensive simulations, that SASES is able to maximize
security for embedded systems while guaranteeing
timeliness. In particular, SASES significantly improves
security over three baseline algorithms by up to
107\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "embedded systems; periodic tasks; real-time systems;
scheduling; security-sensitive applications",
}
@Article{Gupta:2007:ISL,
author = "Rajiv Gupta and Yunheung Paek",
title = "Introduction to the special {LCTES'05} issue",
journal = j-TECS,
volume = "6",
number = "4",
pages = "21:1--21:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274859",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gay:2007:SDP,
author = "David Gay and Philip Levis and David Culler",
title = "Software design patterns for {TinyOS}",
journal = j-TECS,
volume = "6",
number = "4",
pages = "22:1--22:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274860",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present design patterns used by software components
in the TinyOS sensor network operating system. They
differ significantly from traditional software design
patterns because of the constraints of sensor networks
and to TinyOS's focus on static allocation and
whole-program composition. We describe how nesC has
evolved to support these design patterns by including a
few simple language primitives and optimizations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "design patterns; embedded systems; nesC; TinyOS",
}
@Article{Chanet:2007:ARM,
author = "Dominique Chanet and Bjorn {De Sutter} and Bruno {De
Bus} and Ludo {Van Put} and Koen {De Bosschere}",
title = "Automated reduction of the memory footprint of the
{Linux} kernel",
journal = j-TECS,
volume = "6",
number = "4",
pages = "23:1--23:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274861",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The limited built-in configurability of Linux can lead
to expensive code size overhead when it is used in the
embedded market. To overcome this problem, we propose
the application of link-time compaction and
specialization techniques that exploit the a priori
known, fixed runtime environment of many embedded
systems. In experimental setups based on the ARM XScale
and i386 platforms, the proposed techniques are able to
reduce the kernel memory footprint with over 16\%. We
also show how relatively simple additions to existing
binary rewriters can implement the proposed techniques
for a complex, very unconventional program, such as the
Linux kernel. We note that even after specialization, a
lot of seemingly unnecessary code remains in the kernel
and propose to reduce the footprint of this code by
applying code-compression techniques. This technique,
combined with the previous ones, reduces the memory
footprint with over 23\% for the i386 platform and 28\%
for the ARM platform. Finally, we pinpoint an important
code size growth problem when compaction and
compression techniques are combined on the ARM
platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compaction; compression; Linux kernel; operating
system; specialization; system calls",
}
@Article{Sassone:2007:SSS,
author = "Peter G. Sassone and D. Scott Wills and Gabriel H.
Loh",
title = "Static strands: {Safely} exposing dependence chains
for increasing embedded power efficiency",
journal = j-TECS,
volume = "6",
number = "4",
pages = "24:1--24:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274862",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern embedded processors are designed to maximize
execution efficiency---the amount of performance
achieved per unit of energy dissipated while meeting
minimum performance levels. To increase this
efficiency, we propose utilizing static strands,
dependence chains without fan-out, which are exposed by
a compiler pass. These dependent instructions are
resequenced to be sequential and annotated to
communicate their location to the hardware.
Importantly, this modified application is binary
compatible and functionally identical to the original,
allowing transparent execution on a baseline processor.
However, these static strands can be easily collapsed
and optimized by simple processor modifications,
significantly reducing the workload energy. Results
show that over 30\% of MediaBench and Spec2000int
dynamic instructions can be collapsed, reducing issue
logic energy by 20\%, bypass energy 19\%, and register
file energy 14\%. In addition, by increasing the
effective capacity of pipeline resources by almost a
third, average IPC can be improved up to 15\%. This
performance gain can then be traded in for a lower
clock frequency to maintain a basline level of
performance, further reducing energy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "architecture; dependency collapsing; energy;
sequentiality",
}
@Article{Staschulat:2007:SPC,
author = "Jan Staschulat and Rolf Ernst",
title = "Scalable precision cache analysis for real-time
software",
journal = j-TECS,
volume = "6",
number = "4",
pages = "25:1--25:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274863",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Caches are needed to increase the processor
performance, but the temporal behavior is difficult to
predict, especially in embedded systems with preemptive
scheduling. Current approaches use simplified
assumptions or propose complex analysis algorithms to
bound the cache-related preemption delay. In this
paper, a scalable preemption delay analysis for
associative instruction caches to control the analysis
precision and the time-complexity is proposed. An
accurate preemption delay calculation is integrated
into a cache-aware schedulability analysis. The
framework is evaluated in several experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "cache; embedded systems; preemptive scheduling;
worst-case execution time analysis",
}
@Article{Varma:2007:AFS,
author = "Ankush Varma and Bruce Jacob and Eric Debes and Igor
Kozintsev and Paul Klein",
title = "Accurate and fast system-level power modeling: an
{XScale}-based case study",
journal = j-TECS,
volume = "6",
number = "4",
pages = "26:1--26:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274864",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Accurate and fast system modeling is central to the
rapid design space exploration needed for
embedded-system design. With fast, complex SoCs playing
a central role in such systems, system designers have
come to require MIPS-range simulation speeds and
near-cycle accuracy. The sophisticated simulation
frameworks that have been developed for high-speed
system performance modeling do not address power
consumption, although it is a key design constraint. In
this paper, we define a simulation-based methodology
for extending system performance-modeling frameworks to
also include power modeling. We demonstrate the use of
this methodology with a case study of a real, complex
embedded system, comprising the Intel XScale{\reg}g
embedded microprocessor, its WMMX{\trademark} SIMD
coprocessor, L1 caches, SDRAM and the on-board address
and data buses. We describe detailed power models for
each of these components and validate them against
physical measurements from hardware, demonstrating that
such frameworks enable designers to model both power
and performance at high speeds without sacrificing
accuracy. Our results indicate that the power estimates
obtained are accurate within 5\% of physical
measurements from hardware, while simulation speeds
consistently exceed a million instructions per second
(MIPS).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "embedded systems; power modeling; SystemC",
}
@Article{Carta:2007:CTA,
author = "Salvatore Carta and Andrea Alimonda and Alessandro
Pisano and Andrea Acquaviva and Luca Benini",
title = "A control theoretic approach to energy-efficient
pipelined computation in {MPSoCs}",
journal = j-TECS,
volume = "6",
number = "4",
pages = "27:1--27:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274865",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this work, we describe a control theoretic approach
to dynamic voltage/frequency scaling (DVFS) in a
pipelined MPSoC architecture with soft real-time
constraints, aimed at minimizing energy consumption
with throughput guarantees. Theoretical analysis and
experiments carried out on a cycle-accurate,
energy-aware, and multiprocessor simulation platform
are provided. We give a dynamic model of the system
behavior which allows to synthesize linear and
nonlinear feedback control schemes for the run-time
adjustment of the core frequencies. We study the
characteristics of the proposed techniques in both
transient and steady-state conditions. Finally, we
compare the proposed feedback approaches and local DVFS
policies from an energy consumption viewpoint.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "DVFS; feedback-control techniques; MPSoC; parallel
systems",
}
@Article{Crenshaw:2007:RIE,
author = "Tanya L. Crenshaw and Spencer Hoke and Ajay Tirumala
and Marco Caccamo",
title = "Robust implicit {EDF}: a wireless {MAC} protocol for
collaborative real-time systems",
journal = j-TECS,
volume = "6",
number = "4",
pages = "28:1--28:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274866",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advances in wireless technology have brought us closer
to extensive deployment of distributed real-time
embedded systems connected through a wireless channel.
The medium-access control (MAC) layer protocol is
critical in providing a real-time guarantee. We have
devised a real-time wireless MAC protocol, robust
implicit earliest deadline first, or RI-EDF. Packets
are transmitted according to EDF scheduling rules,
offering a protocol that implicitly avoids contention.
In the event of a packet loss or a node failure, every
node has the opportunity to recover the schedule based
on a static recovery priority, offering a protocol that
is robust with no central point of failure. We
demonstrate in simulations that RI-EDF provides better
goodput and lower packet loss than existing protocols
like 802.11 PCF and EDCF. In our implementation and
distributed control test-bed, we show that RI-EDF
provides better throughput than the TinyOS MAC-layer
protocol. Overall, RI-EDF provides predictable temporal
behavior with minimal impact on node failures, packet
losses, and noise in the channel.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "earliest deadline first; medium-access control; real
time; wireless",
}
@Article{Quan:2007:EED,
author = "Gang Quan and Xiaobo Sharon Hu",
title = "Energy efficient {DVS} schedule for fixed-priority
real-time systems",
journal = j-TECS,
volume = "6",
number = "4",
pages = "29:1--29:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274867",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy consumption has become an increasingly
important consideration in designing many real-time
embedded systems. Variable voltage processors, if used
properly, can dramatically reduce such system energy
consumption. In this paper, we present a technique to
determine voltage settings for a variable voltage
processor that utilizes a fixed-priority assignment to
schedule jobs. By exploiting more efficiently the
processor slack time, our approach can be more
effective in reducing the execution speed for real-time
tasks when necessary. Our approach also produces the
minimum constant voltage needed to feasibly schedule
the entire job set. With both randomly generated and
practical examples, our heuristic approach can achieve
the dynamic energy reduction very close to the
theoretically optimal one (within 2\%) with much less
computation cost.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "dynamic voltage scaling; fixed-priority scheduling;
low power; real time",
}
@Article{Rao:2007:EOS,
author = "Ravishankar Rao and Sarma Vrudhula",
title = "Energy optimal speed control of a producer--consumer
device pair",
journal = j-TECS,
volume = "6",
number = "4",
pages = "30:1--30:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274868",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose a modular approach for minimizing the total
energy consumed by a pair of generic communicating
devices (producer--consumer scenario) by jointly
controlling their speed profiles. Each device (like a
CPU, or disk drive) is assumed to have a controllable
variable called its speed (e.g., a CPU's clock
frequency, a disk drive's spindle motor speed) that
affects its power consumption and performance (e.g.,
throughput, data transfer rate). The device and task
models we analyzed were inspired by applications like
CD recording (hard drive to CD drive data transfer) and
data processing (disk drive to CPU data transfer). The
proposed solution can be used for any pair of devices
with convex (for continuous speed sets) or W-convex (a
discrete version of a convex function for discrete
speed sets) power--speed relationships. For discrete
speed sets, the method operates directly on the
power--speed values and does not require an analytical
relationship between power and speed. The key to
solving the two-device optimization problem was the
observation that it could be split into two single
device parametric optimization problems, where the
parameters correspond to the common task that both the
devices must execute. The following divide-and-conquer
approach is proposed: [divide] the optimal speed policy
and energy consumption of each device is derived as an
analytical function of its task parameters; [conquer]
the optimal values of these parameters are found by
minimizing the sum of the parameterized energy
functions and plugged back into the parameterized speed
profiles. The main advantage of this approach is that
each device can be characterized independently and this
allows system designers to mix and match
manufacturer-supplied device energy curves to evaluate
and optimize different application scenarios. We
demonstrate our approach using three device
characterization examples (for a CD drive, hard drive,
and a CPU) and two application scenarios (CD recording,
MD5 checksum computation).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "disk drive; energy optimization; joint optimization;
processor; speed control",
}
@Article{Loghi:2007:PMM,
author = "Mirko Loghi and Luca Benini and Massimo Poncino",
title = "Power macromodeling of {MPSoC} message passing
primitives",
journal = j-TECS,
volume = "6",
number = "4",
pages = "31:1--31:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274869",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Estimating the energy consumption of software in
multiprocessor systems-on-chip (MPSoCs) is crucial for
enabling quick evaluations of both software and
hardware optimizations. However, high-level estimations
should be applicable at software level, possibly
constructing effective power models depending on
parameters that can be extracted directly from the
application characteristics. We propose a methodology
for accurate analysis of power consumption of
message-passing primitives in a MPSoC, and, in
particular, an energy model which, in spite of its
simplicity, allows to model the traffic-dependent
nature of energy consumption through the use of a
single, abstract parameter, namely, the size of the
message exchanged.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "communication primitives; macromodeling;
multiprocessor; system-on-chip",
}
@Article{Kansal:2007:PME,
author = "Aman Kansal and Jason Hsu and Sadaf Zahedi and Mani B.
Srivastava",
title = "Power management in energy harvesting sensor
networks",
journal = j-TECS,
volume = "6",
number = "4",
pages = "32:1--32:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1274858.1274870",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:30 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Power management is an important concern in sensor
networks, because a tethered energy infrastructure is
usually not available and an obvious concern is to use
the available battery energy efficiently. However, in
some of the sensor networking applications, an
additional facility is available to ameliorate the
energy problem: harvesting energy from the environment.
Certain considerations in using an energy harvesting
source are fundamentally different from that in using a
battery, because, rather than a limit on the maximum
energy, it has a limit on the maximum rate at which the
energy can be used. Further, the harvested energy
availability typically varies with time in a
nondeterministic manner. While a deterministic metric,
such as residual battery, suffices to characterize the
energy availability in the case of batteries, a more
sophisticated characterization may be required for a
harvesting source. Another issue that becomes important
in networked systems with multiple harvesting nodes is
that different nodes may have different harvesting
opportunity. In a distributed application, the same
end-user performance may be achieved using different
workload allocations, and resultant energy consumptions
at multiple nodes. In this case, it is important to
align the workload allocation with the energy
availability at the harvesting nodes. We consider the
above issues in power management for energy-harvesting
sensor networks. We develop abstractions to
characterize the complex time varying nature of such
sources with analytically tractable models and use them
to address key design issues. We also develop
distributed methods to efficiently use harvested energy
and test these both in simulation and experimentally on
an energy-harvesting sensor network, prototyped for
this work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "adaptive duty cycling; energy neutrality; Heliomote;
lifetime; power management",
}
@Article{Bueno:2007:RRP,
author = "David Bueno and Chris Conger and Alan D. George and
Ian Troxel and Adam Leko",
title = "{RapidIO} for radar processing in advanced space
systems",
journal = j-TECS,
volume = "7",
number = "1",
pages = "1:1--1:38",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324970",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Space-based radar is a suite of applications that
presents many unique system design challenges. In this
paper, we investigate use of RapidIO, a new
high-performance embedded systems interconnect, in
addressing issues associated with the high network
bandwidth requirements of real-time ground moving
target indicator (GMTI), and synthetic aperture Radar
(SAR) applications in satellite systems. Using
validated simulation, we study several critical issues
related to the RapidIO network and algorithms under
study. The results show that RapidIO is a promising
platform for space-based radar using emerging
technology, providing network bandwidth to enable
parallel computation previously unattainable in an
embedded satellite system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ground-moving target indicator; RapidIO; space-based
radar; synthetic aperture radar",
}
@Article{Fei:2007:EOS,
author = "Yunsi Fei and Srivaths Ravi and Anand Raghunathan and
Niraj K. Jha",
title = "Energy-optimizing source code transformations for
operating system-driven embedded software",
journal = j-TECS,
volume = "7",
number = "1",
pages = "2:1--2:26",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324971",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper proposes four types of source code
transformations for operating system (OS)-driven
embedded software programs to reduce their energy
consumption. Their key features include spanning of
process boundaries and minimization of the energy
consumed in the execution of OS
services---opportunities which are beyond the reach of
conventional compiler optimizations and source code
transformations. We have applied the proposed
transformations to several multiprocess benchmark
programs in the context of an embedded Linux OS running
on an Intel StrongARM processor. They achieve up to
37.9\% (23.8\%, on average) energy reduction compared
to highly compiler-optimized implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "energy consumption; Linux; source code
transformations",
}
@Article{Zhu:2007:ESA,
author = "Yifan Zhu and Frank Mueller",
title = "Exploiting synchronous and asynchronous {DVS} for
feedback {EDF} scheduling on an embedded platform",
journal = j-TECS,
volume = "7",
number = "1",
pages = "3:1--3:26",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324972",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Contemporary processors support dynamic voltage
scaling (DVS) to reduce power consumption by varying
processor voltage/frequency dynamically. We develop
power-aware feedback--DVS algorithms for hard real-time
systems that adapt to dynamically changing workloads.
The algorithms lower execution speed while guaranteeing
timing constraints. We study energy consumption for
synchronous and asynchronous DVS switching on a PowerPC
board. Energy, measured via data acquisition, is
reduced up to 70\% over na{\"\i}ve DVS for our feedback
scheme with 24\% peak savings over previous algorithms.
These results, albeit differing in quantity, confirm
trends observed under simulation. They are the first of
their kind on an embedded board.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "dynamic voltage scaling; feedback control; real-time
systems; scheduling",
}
@Article{Vera:2007:DCL,
author = "Xavier Vera and Bj{\"o}rn Lisper and Jingling Xue",
title = "Data cache locking for tight timing calculations",
journal = j-TECS,
volume = "7",
number = "1",
pages = "4:1--4:38",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324973",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Caches have become increasingly important with the
widening gap between main memory and processor speeds.
Small and fast cache memories are designed to bridge
this discrepancy. However, they are only effective when
programs exhibit sufficient data locality. In addition,
caches are a source of unpredictability, resulting in
programs sometimes behaving in a different way than
expected. Detailed information about the number of
cache misses and their causes allows us to predict
cache behavior and to detect bottlenecks. Small
modifications in the source code may change memory
patterns, thereby altering the cache behavior. Code
transformations, which take the cache behavior into
account, might result in a high cache performance
improvement. However, cache memory behavior is very
hard to predict, thus making the task of optimizing and
timing cache behavior very difficult. This article
proposes and evaluates a new compiler framework that
times cache behavior for multitasking systems. Our
method explores the use of cache partitioning and
dynamic cache locking to provide worst-case performance
estimates in a safe and tight way for multitasking
systems. We use cache partitioning, which divides the
cache among tasks to eliminate intertask cache
interferences. We combine static cache analysis and
cache-locking mechanisms to ensure that all intratask
conflicts, and consequently, memory access times, are
exactly predictable. The results of our experiments
demonstrate the capability of our framework to describe
cache behavior at compile time. We compare our timing
approach with a system equipped with a nonpartitioned,
but statically, locked data cache. Our method
outperforms static cache locking for all analyzed task
sets under various cache architectures, demonstrating
that our fully predictable scheme does not compromise
the performance of the transformed programs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "data cache analysis; embedded systems; safety critical
systems; worst-case execution time",
}
@Article{Armbruster:2007:RTJ,
author = "Austin Armbruster and Jason Baker and Antonio Cunei
and Chapman Flack and David Holmes and Filip Pizlo and
Edward Pla and Marek Prochazka and Jan Vitek",
title = "A real-time {Java} virtual machine with applications
in avionics",
journal = j-TECS,
volume = "7",
number = "1",
pages = "5:1--5:49",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324974",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper reports on our experience with the
implementation of the Real-time Specification for Java
on the Ovm open source Java virtual machine. We
describe the architecture and main design decisions
involved in implementing real-time Java on Ovm. We
present the first use of Real-time Java in avionics in
the context of control software for a ScanEagle
Unmanned Aerial Vehicle.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "avionics; memory management; Real-Time Java; virtual
machines",
}
@Article{Mangeruca:2007:USU,
author = "Leonardo Mangeruca and Massimo Baleani and Alberto
Ferrari and Alberto Sangiovanni-Vincentelli",
title = "Uniprocessor scheduling under precedence constraints
for embedded systems design",
journal = j-TECS,
volume = "7",
number = "1",
pages = "6:1--6:30",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324975",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this paper, we present a novel approach to the
constrained scheduling problem, while addressing a more
general class of constraints that arise from the timing
requirements on real-time embedded controllers. We
provide general necessary and sufficient conditions for
scheduling under precedence constraints and derive
sufficient conditions for two well-known scheduling
policies. We define mathematical problems that provide
optimum priority and deadline assignments, while
ensuring both precedence constraints and system's
schedulability. We show how these problems can be
relaxed to corresponding integer linear programming
(ILP) formulations leveraging on available solvers. The
results are demonstrated on a real design case.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "design of embedded systems; embedded software;
precedence constraints; real-time scheduling",
}
@Article{Bordoloi:2007:ISA,
author = "Unmesh D. Bordoloi and Samarjit Chakraborty",
title = "Interactive schedulability analysis",
journal = j-TECS,
volume = "7",
number = "1",
pages = "7:1--7:27",
month = dec,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1324969.1324976",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:21:48 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A typical design process for real-time embedded
systems involves choosing the values of certain system
parameters and performing a schedulability analysis to
determine whether all deadline constraints can be
satisfied. If such an analysis returns a negative
answer, then some of the parameters are modified and
the analysis is invoked once again. This iteration is
repeated until a schedulable design is obtained.
However, the schedulability analysis problem for most
task models is intractable (usually co-NP hard) and,
hence, such an iterative design process is often very
expensive. To get around this problem, we introduce the
concept of ``interactive'' schedulability analysis. It
is based on the observation that if only a small number
of system parameters are changed, then it is not
necessary to rerun the full schedulability analysis
algorithm, thereby making the iterative design process
considerably faster. We refer to this analysis as being
``interactive'' because it is supposed to be run in an
interactive mode. This concept is fairly general and
can be applied to a wide variety of task models. In
this paper, we have chosen the recurring real-time task
model, because it can be used to represent realistic
applications from the embedded systems domain
(containing conditional branches and fine-grained
deadline constraints). Our experimental results show
that using our scheme can lead to more than 20{\times}
speedup for each invocation of the schedulability
analysis algorithm, compared to the case where the full
algorithm is run.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "interactive design; nonfunctional constraints;
performance debugging; recurring real-time task model;
schedulability analysis",
}
@Article{Ha:2008:IES,
author = "Soonhoi Ha and Kiyoung Choi and Taewhan Kim and
Krisztian Flautner and Sanglyul Min and Wang Yi",
title = "Introduction to embedded systems week 2006 special
issue",
journal = j-TECS,
volume = "7",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331332",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2008:EAC,
author = "Minyoung Kim and Sudarshan Banerjee and Nikil Dutt and
Nalini Venkatasubramanian",
title = "Energy-aware cosynthesis of real-time multimedia
applications on {MPSoCs} using heterogeneous scheduling
policies",
journal = j-TECS,
volume = "7",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331333",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Real-time multimedia applications are increasingly
being mapped onto MPSoC (multiprocessor system-on-chip)
platforms containing hardware--software IPs
(intellectual property), along with a library of common
scheduling policies such as EDF, RM. The choice of a
scheduling policy for each IP is a key decision that
greatly affects the design's ability to meet real-time
constraints, and also directly affects the energy
consumed by the design. We present a cosynthesis
framework for design space exploration that considers
heterogeneous scheduling while mapping multimedia
applications onto such MPSoCs. In our approach, we
select a suitable scheduling policy for each IP such
that system energy is minimized---our framework also
includes energy-reduction techniques utilizing dynamic
power management. Experimental results on a realistic
multimode multimedia terminal application demonstrate
that our approach enables us to select design points
with up to 60.5\% reduced energy for a given area
constraint, while meeting all real-time requirements.
More importantly, our approach generates a tradeoff
space between energy and cost allowing designers to
comparatively evaluate multiple system level
mappings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "cosynthesis; energy; MPSoC; real-time scheduling",
}
@Article{Raman:2008:ASW,
author = "Balaji Raman and Samarjit Chakraborty",
title = "Application-specific workload shaping in
multimedia-enabled personal mobile devices",
journal = j-TECS,
volume = "7",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331334",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today, most personal mobile devices (e.g., cell phones
and PDAs) are multimedia-enabled and support a variety
of concurrently running applications, such as
audio/video players, word processors, and web browsers.
Media-processing applications are often computationally
expensive and most of these devices typically have
100--400-MHz processors. As a result, the
user-perceived application response times are often
poor when multiple applications are concurrently fired.
In this paper, we show that by using
application-specific dynamic buffering techniques, the
workload of these applications can be suitably
``shaped'' to fit the available processor bandwidth.
Our techniques are analogous to traffic shaping, which
is widely used in communication networks to optimally
utilize network bandwidth. Such shaping techniques have
recently attracted a lot of attention in the context of
embedded systems design (e.g., for dynamic voltage
scaling). However, they have not been exploited for
enhanced schedulability of multiple applications, as we
do in this paper.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "mobile devices; multimedia systems; schedulability
analysis",
}
@Article{Egger:2008:DSM,
author = "Bernhard Egger and Jaejin Lee and Heonshik Shin",
title = "Dynamic scratchpad memory management for code in
portable systems with an {MMU}",
journal = j-TECS,
volume = "7",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331335",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this work, we present a dynamic memory allocation
technique for a novel, horizontally partitioned memory
subsystem targeting contemporary embedded processors
with a memory management unit (MMU). We propose to
replace the on-chip instruction cache with a scratchpad
memory (SPM) and a small minicache. Serializing the
address translation with the actual memory access
enables the memory system to access either only the SPM
or the minicache. Independent of the SPM size and based
solely on profiling information, a postpass optimizer
classifies the code of an application binary into a
pageable and a cacheable code region. The latter is
placed at a fixed location in the external memory and
cached by the minicache. The former, the pageable code
region, is copied on demand to the SPM before
execution. Both the pageable code region and the SPM
are logically divided into pages the size of an MMU
memory page. Using the MMU's pagefault exception
mechanism, a runtime scratchpad memory manager (SPMM)
tracks page accesses and copies frequently executed
code pages to the SPM before they get executed. In
order to minimize the number of page transfers from the
external memory to the SPM, good code placement
techniques become more important with increasing sizes
of the MMU pages. We discuss code-grouping techniques
and provide an analysis of the effect of the MMU's page
size on execution time, energy consumption, and
external memory accesses. We show that by using the
data cache as a victim buffer for the SPM, significant
energy savings are possible. We evaluate our SPM
allocation strategy with fifteen applications,
including H.264, MP3, MPEG-4, and PGP. The proposed
memory system requires 8\% less die are compared to a
fully-cached configuration. On average, we achieve a
31\% improvement in runtime performance and a 35\%
reduction in energy consumption with an MMU page size
of 256 bytes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "code placement; compilers; heterogeneous memory;
paging; portable systems; postpass optimization;
scratchpad; victim cache; virtual memory",
}
@Article{Scholz:2008:MPB,
author = "Bernhard Scholz and Bernd Burgstaller and Jingling
Xue",
title = "Minimal placement of bank selection instructions for
partitioned memory architectures",
journal = j-TECS,
volume = "7",
number = "2",
pages = "12:1--12:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331336",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We have devised an algorithm for minimal placement of
bank selections in partitioned memory architectures.
This algorithm is parameterizable for a chosen metric,
such as speed, space, or energy. Bank switching is a
technique that increases the code and data memory in
microcontrollers without extending the address buses.
Given a program in which variables have been assigned
to data banks, we present a novel optimization
technique that minimizes the overhead of bank switching
through cost-effective placement of bank selection
instructions. The placement is controlled by a number
of different objectives, such as runtime, low power,
small code size or a combination of these parameters.
We have formulated the minimal placement of bank
selection instructions as a discrete optimization
problem that is mapped to a partitioned boolean
quadratic programming (PBQP) problem. We implemented
the optimization as part of a PIC Microchip backend and
evaluated the approach for several optimization
objectives. Our benchmark suite comprises programs from
MiBench and DSPStone plus a microcontroller real-time
kernel and drivers for microcontroller hardware
devices. Our optimization achieved a reduction in
program memory space of between 2.7 and 18.2\%, and an
overall improvement with respect to instruction cycles
between 5.0 and 28.8\%. Our optimization achieved the
minimal solution for all benchmark programs. We
investigated the scalability of our approach toward the
requirements of future generations of microcontrollers.
This study was conducted as a worst-case analysis on
the entire MiBench suite. Our results show that our
optimization (1) scales well to larger numbers of
memory banks, (2) scales well to the larger problem
sizes that will become feasible with future
microcontrollers, and (3) achieves minimal placement
for more than 72\% of all functions from MiBench.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "bank selection; partitioned Boolean quadratic
programming; partitioned memory architectures",
}
@Article{Choi:2008:SHM,
author = "Yoonseo Choi and Hwansoo Han",
title = "Shared heap management for memory-limited {Java}
virtual machines",
journal = j-TECS,
volume = "7",
number = "2",
pages = "13:1--13:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331337",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "One scarce resource in embedded systems is memory.
Multitasking makes the lack of memory problem even
worse. Most current embedded systems, which do not
provide virtual memory, simply divide physical memory
and evenly assign contiguous memory chunks to multiple
applications. Such simple memory management can
frequently cause the lack of available memory for some
applications, while others are not using the full
amount of assigned memory. To overcome inefficiency in
current memory management, we present an efficient heap
management scheme that allows multiple applications to
share heap space. To reduce overall heap memory usage,
applications adaptively acquire subheaps out of shared
pool of memory and release surplus subheaps to shared
pool. As a result, applications see noncontiguous
multiple subheaps as a heap in their address space. We
target Java applications to implement our heap-sharing
scheme in the KVM from Sun Microsystems. To protect
fragmented heap space with a limited number of regions
in memory protection unit (MPU), we maintain only a
limited number of subheaps. We experimentally evaluate
our heap management scheme with J2ME MIDP applications.
Our static and dynamic schemes reduce heap memory
usage, on average, by 30 and 27\%, respectively. For
both schemes, overheads are kept low. The execution
times in our schemes are increased only by 0.01\% for
static scheme and 0.35\% for dynamic scheme, on
average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "dynamic memory management; garbage collection; heap
sharing; memory protection unit",
}
@Article{So:2008:UHS,
author = "Hayden Kwok-Hay So and Robert Brodersen",
title = "A unified hardware\slash software runtime environment
for {FPGA}-based reconfigurable computers using
{BORPH}",
journal = j-TECS,
volume = "7",
number = "2",
pages = "14:1--14:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331338",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper explores the design and implementation of
BORPH, an operating system designed for FPGA-based
reconfigurable computers. Hardware designs execute as
normal UNIX processes under BORPH, having access to
standard OS services, such as file system support.
Hardware and software components of user designs may,
therefore, run as communicating processes within
BORPH's runtime environment. The familiar language
independent UNIX kernel interface facilitates easy
design reuse and rapid application development. To
develop hardware designs, a Simulink-based design flow
that integrates with BORPH is employed. Performances of
BORPH on two on-chip systems implemented on a BEE2
platform are compared.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "BORPH; FPGA; hardware process; reconfigurable
computers",
}
@Article{Caspi:2008:SPM,
author = "Paul Caspi and Norman Scaife and Christos Sofronis and
Stavros Tripakis",
title = "Semantics-preserving multitask implementation of
synchronous programs",
journal = j-TECS,
volume = "7",
number = "2",
pages = "15:1--15:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We study the implementation of a synchronous program
as a set of multiple tasks running on the same
computer, and scheduled by a real-time operating system
using some preemptive scheduling policy, such as fixed
priority or earliest-deadline first. Multitask
implementations are necessary, for instance, in
multiperiodic applications, when the worst-case
execution time of the program is larger than its
smallest period. In this case, a single-task
implementation violates the schedulability assumption
and, therefore, the synchrony hypothesis does not hold.
We are aiming at semantics-preserving implementations,
where, for a given input sequence, the output sequence
produced by the implementation is the same as that
produced by the original synchronous program, and this
under all possible executions of the implementation.
Straightforward implementation techniques are not
semantics-preserving. We present an intertask
communication protocol, called DBP, that is
semantics-preserving and memory-optimal. DBP guarantees
semantical preservation under all possible triggering
patterns of the synchronous program: thus, it is
applicable not only to time-, but also event-triggered
applications. DBP works under both fixed priority and
earliest-deadline first scheduling. DBP is a
nonblocking protocol based on the use of intermediate
buffers and manipulations of write-to/read-from
pointers to these buffers: these manipulations happen
upon arrivals, rather than executions of tasks, which
is a distinguishing feature of DBP. DBP is
memory-optimal in the sense that it uses as few buffers
as needed, for any given triggering pattern. In the
worst case, DBP requires, at most, $ N + 2 $ buffers
for each writer, where $N$ is the number of readers for
this writer.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "embedded software; model-based design; optimality;
preemptive scheduling; process communication;
semantical preservation; synchronous programming",
}
@Article{Liu:2008:HPP,
author = "Duo Liu and Zheng Chen and Bei Hua and Nenghai Yu and
Xinan Tang",
title = "High-performance packet classification algorithm for
multithreaded {IXP} network processor",
journal = j-TECS,
volume = "7",
number = "2",
pages = "16:1--16:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331340",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Packet classification is crucial for the Internet to
provide more value-added services and guaranteed
quality of service. Besides hardware-based solutions,
many software-based classification algorithms have been
proposed. However, classifying at 10 Gbps speed or
higher is a challenging problem and it is still one of
the performance bottlenecks in core routers. In
general, classification algorithms face the same
challenge of balancing between high classification
speed and low memory requirements. This paper proposes
a modified recursive flow classification (RFC)
algorithm, Bitmap-RFC, which significantly reduces the
memory requirements of RFC by applying a bitmap
compression technique. To speed up classifying speed,
we exploit the multithreaded architectural features in
various algorithm development stages from algorithm
design to algorithm implementation. As a result,
Bitmap-RFC strikes a good balance between speed and
space. It can significantly keep both high
classification speed and reduce memory space
consumption. This paper investigates the main NPU
software design aspects that have dramatic performance
impacts on any NPU-based implementations: memory space
reduction, instruction selection, data allocation, task
partitioning, and latency hiding. We experiment with an
architecture-aware design principle to guarantee the
high performance of the classification algorithm on an
NPU implementation. The experimental results show that
the Bitmap-RFC algorithm achieves 10 Gbps speed or
higher and has a good scalability on Intel IXP2800
NPU.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "architecture; embedded system design; multithreading;
network processor; packet classification; thread-level
parallelism",
}
@Article{Zhuo:2008:EED,
author = "Jianli Zhuo and Chaitali Chakrabarti",
title = "Energy-efficient dynamic task scheduling algorithms
for {DVS} systems",
journal = j-TECS,
volume = "7",
number = "2",
pages = "17:1--17:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331341",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dynamic voltage scaling (DVS) is a well-known
low-power design technique that reduces the processor
energy by slowing down the DVS processor and stretching
the task execution time. However, in a DVS system
consisting of a DVS processor and multiple devices,
slowing down the processor increases the device energy
consumption and thereby the system-level energy
consumption. In this paper, we first use system-level
energy consideration to derive the ``optimal '' scaling
factor by which a task should be scaled if there are no
deadline constraints. Next, we develop dynamic
task-scheduling algorithms that make use of dynamic
processor utilization and optimal scaling factor to
determine the speed setting of a task. We present
algorithm duEDF, which reduces the CPU energy
consumption and algorithm duSYS and its reduced
preemption version, duSYS\_PC, which reduce the
system-level energy. Experimental results on the
video-phone task set show that when the CPU power is
dominant, algorithm duEDF results in up to 45\% energy
savings compared to the non-DVS case. When the CPU
power and device power are comparable, algorithms duSYS
and duSYS\_PC achieve up to 25\% energy saving compared
to CPU energy-efficient algorithm duEDF, and up to 12\%
energy saving over the non-DVS scheduling algorithm.
However, if the device power is large compared to the
CPU power, then we show that a DVS scheme does not
result in lowest energy. Finally, a comparison of the
performance of algorithms duSYS and duSYS\_PC show that
preemption control has minimal effect on system-level
energy reduction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "DVS system; dynamic task scheduling; energy
minimization; optimal scaling factor; real time",
}
@Article{Lee:2008:DFR,
author = "Sheayun Lee and Insik Shin and Woonseok Kim and Insup
Lee and Sang Lyul Min",
title = "A design framework for real-time embedded systems with
code size and energy constraints",
journal = j-TECS,
volume = "7",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331342",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Real-time embedded systems are typically constrained
in terms of three system performance criteria: space,
time, and energy. The performance requirements are
directly translated into constraints imposed on the
system's resources, such as code size, execution time,
and energy consumption. These resource constraints
often interact or even conflict with each other in a
complex manner, making it difficult for a system
developer to apply a well-defined design methodology in
developing a real-time embedded system. Motivated by
this observation, we propose a design framework that
can flexibly balance the tradeoff involving the
system's code size, execution time, and energy
consumption. Given a system specification and an
optimization criteria, the proposed technique generates
a set of design parameters in such a way that a system
cost function is minimized while the given resource
constraints are satisfied. Specifically, the technique
derives code generation decision for each task so that
a specific version of code is selected among a number
of different ones that have distinct characteristics in
terms of code size and execution time. In addition, the
design framework determines the voltage/frequency
setting for a variable voltage processor whose supply
voltage can be adjusted at runtime in order to minimize
the energy consumption while execution performance is
degraded accordingly. The proposed technique formulates
this design process as a constrained optimization
problem. We show that this optimization problem is
NP-hard and then provide a heuristic solution to it. We
show that these seemingly conflicting design goals can
be pursued by using a simple optimization algorithm
that works with a single optimization criteria.
Moreover, the optimization is driven by an abstract
system specification given by the system developer, so
that the system development process can be automated.
The results from our simulation show that the proposed
algorithm finds a solution that is close to the optimal
one with the average error smaller than 1.0\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "code size; embedded; energy; real-time; scheduling",
}
@Article{Manolache:2008:TMP,
author = "Sorin Manolache and Petru Eles and Zebo Peng",
title = "Task mapping and priority assignment for soft
real-time applications under deadline miss ratio
constraints",
journal = j-TECS,
volume = "7",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331343",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Both analysis and design optimisation of real-time
systems has predominantly concentrated on considering
hard real-time constraints. For a large class of
applications, however, this is both unrealistic and
leads to unnecessarily expensive implementations. This
paper addresses the problem of task priority assignment
and task mapping in the context of multiprocessor
applications with stochastic execution times and in the
presence of constraints on the percentage of missed
deadlines. We propose a design space exploration
strategy together with a fast method for system
performance analysis. Experiments emphasize the
efficiency of the proposed analysis method and
optimisation heuristic in generating high-quality
implementations of soft real-time systems with
stochastic task execution times and constraints on
deadline miss ratios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "mapping; priority assignment; schedulability analysis;
soft real-time systems; stochastic task execution
times",
}
@Article{Park:2008:SRB,
author = "Taejoon Park and Kang G. Shin",
title = "Secure routing based on distributed key sharing in
large-scale sensor networks",
journal = j-TECS,
volume = "7",
number = "2",
pages = "20:1--20:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331344",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sensor networks, usually built with a large number of
small, low-cost sensor nodes, are characterized by
their large-scale and unattended deployment,
necessitating ``secure'' communications between nearby,
as well as remote, sensor nodes for their intended
applications and services. Key setup/sharing is crucial
to the protection of such applications/services from
attacks, but existing (public-key, cluster-based, or
pairwise) solutions become too expensive (hence,
inefficient) when the underlying applications/services
require communications between distant sensor nodes. To
remedy this inefficiency, we propose a novel
distributed key-sharing scheme, in which each
participating sensor node shares unique keys with a
small number of other sensor nodes---called distributed
key servers (DKSs)---chosen according to their
geographic distance and communication direction. Using
DKSs, we develop two secure routing protocols: (1)
secure geographic forwarding that delivers packets by
using a chain of DKS lookups, each secured with its own
key and forwarded geographically; and (2) key
establishment that creates a secure session between two
distant sensor nodes based solely on symmetric-ciphers.
These protocols enable low-cost, low-power sensors to
provide high-level security at a very low cost.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "attack tolerance; distributed key sharing and servers;
key establishment; large-scale sensor networks; secure
geographic forwarding",
}
@Article{Cho:2008:DNP,
author = "Young H. Cho and William H. Mangione-Smith",
title = "Deep network packet filter design for reconfigurable
devices",
journal = j-TECS,
volume = "7",
number = "2",
pages = "21:1--21:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331345",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most network routers and switches provide some
protection against the network attacks. However, the
rapidly increasing amount of damages reported over the
past few years indicates the urgent need for tougher
security. Deep-packet inspection is one of the
solutions to capture packets that can not be identified
using the traditional methods. It uses a list of
signatures to scan the entire content of the packet,
providing the means to filter harmful packets out of
the network. Since one signature does not depend on the
other, the filtering process has a high degree of
parallelism. Most software and hardware deep-packet
filters that are in use today execute the tasks under
Von Neuman architecture. Such architecture can not
fully take advantage of the parallelism. For instance,
one of the most widely used network intrusion-detection
systems, Snort, configured with 845 patterns, running
on a dual 1-GHz Pentium III system, can sustain a
throughput of only 50 Mbps. The poor performance is
because of the fact that the processor is programmed to
execute several tasks sequentially instead of
simultaneously. We designed scalable deep-packet
filters on field-programmable gate arrays (FPGAs) to
search for all data-independent patterns
simultaneously. With FPGAs, we have the ability to
reprogram the filter when there are any changes to the
signature set. The smallest full-pattern matcher
implementation for the latest Snort NIDS fits in a
single 400k Xilinx FPGA (Spartan 3-XC3S400) with a
sustained throughput of 1.6 Gbps. Given a larger FPGA,
the design can scale linearly to support a greater
number of patterns, as well as higher data
throughput.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "firewall; network intrusion detection; string filter;
virus; worm",
}
@Article{Pasricha:2008:FEB,
author = "Sudeep Pasricha and Nikil Dutt and Mohamed
Ben-Romdhane",
title = "Fast exploration of bus-based communication
architectures at the {CCATB} abstraction",
journal = j-TECS,
volume = "7",
number = "2",
pages = "22:1--22:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1331331.1331346",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:00 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Currently, system-on-chip (SoC) designs are becoming
increasingly complex, with more and more components
being integrated into a single SoC design.
Communication between these components is increasingly
dominating critical system paths and frequently becomes
the source of performance bottlenecks. It, therefore,
becomes imperative for designers to explore the
communication space early in the design flow.
Traditionally, system designers have used Pin-Accurate
Bus Cycle Accurate (PA-BCA) models for early
communication space exploration. These models capture
all of the bus signals and strictly maintain cycle
accuracy, which is useful for reliable performance
exploration but results in slow simulation speeds for
complex, designs, even when they are modeled using
high-level languages. Recently, there have been several
efforts to use the Transaction-Level Modeling (TLM)
paradigm for improving simulation performance in BCA
models. However, these transaction-based BCA (T-BCA)
models capture a lot of details that can be eliminated
when exploring communication architectures. In this
paper, we extend the TLM approach and propose a new
transaction-based modeling abstraction level (CCATB) to
explore the communication design space. Our abstraction
level bridges the gap between the TLM and BCA levels,
and yields an average performance speedup of 120\% over
PA-BCA and 67\% over T-BCA models, on average. The
CCATB models are not only faster to simulate, but also
extremely accurate and take less time to model compared
to both T-BCA and PA-BCA models. We describe the
mechanisms that produce the speedup in CCATB models and
also analyze how the achieved simulation speedup scales
with design complexity. To demonstrate the
effectiveness of using CCATB for exploration, we
present communication space exploration case studies
from the broadband communication and multimedia
application domains.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "communication architecture; on-chip bus; performance
exploration; system-on-chip; transaction-level
modeling",
}
@Article{DiNatale:2008:BOM,
author = "Marco {Di Natale} and Valerio Pappalardo",
title = "Buffer optimization in multitask implementations of
{Simulink} models",
journal = j-TECS,
volume = "7",
number = "3",
pages = "23:1--23:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347376",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Automatic generation of a controller implementation
from a synchronous reactive model is among the best
practices for software development in the automotive
and aeronautics industry, because of the possibility of
simulation, model checking, and error-free
implementation. This paper discusses an algorithm for
optimizing the single-processor multitask
implementation of Simulink models with real-time
execution constraints, derived from the sampling rates
of the functional blocks. Existing code generation
tools enforce the addition of extra buffering and
latencies whenever there is a rate transition among
functional blocks. This work shows how timing analysis
can be used to find the cases in which additional
buffering and latency can be avoided, improving the
space and time performance of the application. The
proposed search algorithm allows finding a solution
with reduced and possibly minimal use of buffering even
for very high values of processor utilization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "code generation; real-time programming;
schedulability; software models",
}
@Article{Trajkovic:2008:ISA,
author = "Jelena Trajkovic and Alexander V. Veidenbaum and Arun
Kejariwal",
title = "Improving {SDRAM} access energy efficiency for
low-power embedded systems",
journal = j-TECS,
volume = "7",
number = "3",
pages = "24:1--24:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347377",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "DRAM (dynamic random-access memory) energy consumption
in low-power embedded systems can be very high,
exceeding that of the data cache or even that of the
processor. This paper presents and evaluates a scheme
for reducing the energy consumption of SDRAM
(synchronous DRAM) memory access by a combination of
techniques that take advantage of SDRAM energy
efficiencies in bank and row access. This is achieved
by using small, cachelike structures in the memory
controller to prefetch an additional cache block(s) on
SDRAM reads and to combine block writes to the same
SDRAM row. The results quantify the SDRAM energy
consumption of MiBench applications and demonstrate
significant savings in SDRAM energy consumption, 23\%,
on average, and reduction in the energy-delay product,
44\%, on average. The approach also improves
performance: the CPI is reduced by 26\%, on average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "embedded processors and low power; fetch buffer;
SDRAM; write-combining buffer",
}
@Article{Varma:2008:AFS,
author = "Ankush Varma and Eric Debes and Igor Kozintsev and
Paul Klein and Bruce Jacob",
title = "Accurate and fast system-level power modeling: an
{XScale}-based case study",
journal = j-TECS,
volume = "7",
number = "3",
pages = "25:1--25:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347378",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Accurate and fast system modeling is central to the
rapid design space exploration needed for
embedded-system design. With fast, complex SoCs playing
a central role in such systems, system designers have
come to require MIPS-range simulation speeds and
near-cycle accuracy. The sophisticated simulation
frameworks that have been developed for high-speed
system performance modeling do not address power
consumption, although it is a key design constraint. In
this paper, we define a simulation-based methodology
for extending system performance modeling frameworks to
also include power modeling. We demonstrate the use of
this methodology with a case study of a real, complex
embedded system, comprising the Intel XScale embedded
microprocessor, its WMMX SIMD co processor, L1 caches,
SDRAM, and the on-board address and data buses. We
describe detailed power models for each of these
components and validate them against physical
measurements from hardware, demonstrating that such
frameworks enable designers to model both power and
performance at high speeds without sacrificing
accuracy. Our results indicate that the power estimates
obtained are accurate within 5\% of physical
measurements from hardware, while simulation speeds
consistently exceed a million instructions per second
(MIPS).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "embedded systems; power modeling; SystemC",
}
@Article{Aamodt:2008:CTI,
author = "Tor M. Aamodt and Paul Chow",
title = "Compile-time and instruction-set methods for improving
floating- to fixed-point conversion accuracy",
journal = j-TECS,
volume = "7",
number = "3",
pages = "26:1--26:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347379",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper proposes and evaluates compile time and
instruction-set techniques for improving the accuracy
of signal-processing algorithms run on fixed-point
embedded processors. These techniques are proposed in
the context of a profile guided floating- to
fixed-point compiler-based conversion process. A novel
fixed-point scaling algorithm (IRP) is introduced that
exploits correlations between values in a program by
applying fixed-point scaling, retaining as much
precision as possible without causing overflow. This
approach is extended into a more aggressive scaling
algorithm (IRP-SA) by leveraging the modulo nature of
2's complement addition and subtraction to discard most
significant bits that may not be redundant
sign-extension bits. A complementary scaling technique
(IDS) is then proposed that enables the fixed-point
scaling of a variable to be parameterized, depending
upon the context of its definitions and uses. Finally,
a novel instruction-set enhancement--- fractional
multiplication with internal left shift (FMLS)---is
proposed to further leverage interoperand correlations
uncovered by the IRP-SA scaling algorithm. FMLS
preserves a different subset of the full product's bits
than traditional fractional fixed-point or integer
multiplication. On average, FMLS combined with IRP-SA
improves accuracy on processors with uniform bitwidth
register architectures by the equivalent of 0.61 bits
of additional precision for a set of signal-processing
benchmarks (up to 2 bits). Even without employing FMLS,
the IRP-SA scaling algorithm achieves additional
accuracy over two previous fixed-point scaling
algorithms by averages of 1.71 and 0.49 bits.
Furthermore, as FMLS combines multiplication with a
scaling shift, it reduces execution time by an average
of 9.8\%. An implementation of IDS, specialized to
single-nested loops, is found to improve accuracy of a
lattice filter benchmark by the equivalent of more than
16-bits of precision.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compilation; digital signal processing; fixed-point;
fractional multiplication; scaling; signal-to-noise
ratio",
}
@Article{Fei:2008:EAF,
author = "Yunsi Fei and Lin Zhong and Niraj K. Jha",
title = "An energy-aware framework for dynamic software
management in mobile computing systems",
journal = j-TECS,
volume = "7",
number = "3",
pages = "27:1--27:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347380",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy efficiency has become a very important and
challenging issue for resource-constrained mobile
computers. In this article, we propose a novel dynamic
software management (DSOM) framework to improve battery
utilization. We have designed and implemented a DSOM
module in user space, independent of the operating
system (OS), which explores quality-of-service (QoS)
adaptation to reduce system energy and employs a
priority-based preemption policy for multiple
applications to avoid competition for limited energy
resources. Software energy macromodels for mobile
applications are employed to predict energy demand at
each QoS level, so that the DSOM module is able to
select the best possible trade-off between energy
conservation and application QoS; it also honors the
priority desired by the user. Our experimental results
for some mobile applications (video player, speech
recognizer, voice-over-IP) show that this approach can
meet user-specified task-oriented goals and
significantly improve battery utilization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "energy macromodel; runtime coordination; software
adaptation",
}
@Article{Zhong:2008:SWE,
author = "Xiliang Zhong and Cheng-Zhong Xu",
title = "System-wide energy minimization for real-time tasks:
{Lower} bound and approximation",
journal = j-TECS,
volume = "7",
number = "3",
pages = "28:1--28:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347381",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a dynamic voltage scaling (DVS) technique
that minimizes system-wide energy consumption for both
periodic and sporadic tasks. It is known that a system
consists of processors and a number of other
components. Energy-aware processors can be run in
different speed levels; components like memory and I/O
subsystems and network interface cards can be in a
standby state when they are active, but idle. Processor
energy optimization solutions are not necessarily
efficient from the perspective of systems. Current
system-wide energy optimization studies are often
limited to periodic tasks with heuristics in getting
approximated solutions. In this paper, we develop an
exact dynamic programming algorithm for periodic tasks
on processors with practical discrete speed levels. The
algorithm determines the lower bound of energy
expenditure in pseudopolynomial time. An approximation
algorithm is proposed to provide performance guarantee
with a given bound in polynomial running time. Because
of their time efficiency, both the optimization and
approximation algorithms can be adapted for online
scheduling of sporadic tasks with irregular task
releases. We prove that system-wide energy optimization
for sporadic tasks is NP-hard in the strong sense. We
develop (pseudo-) polynomial-time solutions by
exploiting its inherent properties.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "dynamic power management; dynamic voltage scaling;
power-aware scheduling; real-time systems",
}
@Article{Zhou:2008:CIA,
author = "Ye Zhou and Edward A. Lee",
title = "Causality interfaces for actor networks",
journal = j-TECS,
volume = "7",
number = "3",
pages = "29:1--29:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347382",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We consider concurrent models of computation where
``actors'' (components that are in charge of their own
actions) communicate by exchanging messages. The
interfaces of actors principally consist of ``ports,''
which mediate the exchange of messages. Actor-oriented
architectures contrast with and complement
object-oriented models by emphasizing the exchange of
data between concurrent components rather than
transformation of state. Examples of such models of
computation include the classical actor model,
synchronous languages, data-flow models, process
networks, and discrete-event models. Many experimental
and production languages used to design embedded
systems are actor oriented and based on one of these
models of computation. Many of these models of
computation benefit considerably from having access to
causality information about the components. This paper
augments the interfaces of such components to include
such causality information. It shows how this causality
information can be algebraically composed so that
compositions of components acquire causality interfaces
that are inferred from their components and the
interconnections. We illustrate the use of these
causality interfaces to statically analyze timed models
and synchronous language compositions for causality
loops and data-flow models for deadlock. We also show
that causality analysis for each communication cycle
can be performed independently and in parallel, and it
is only necessary to analyze one port for each cycle.
Finally, we give a conservative approximation technique
for handling dynamically changing causality
properties.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "actors; behavioral types; causality; data flow;
deadlock; discrete-event models; interfaces;
synchronous languages; timed systems",
}
@Article{Shin:2008:CRT,
author = "Insik Shin and Insup Lee",
title = "Compositional real-time scheduling framework with
periodic model",
journal = j-TECS,
volume = "7",
number = "3",
pages = "30:1--30:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347383",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "It is desirable to develop large complex systems using
components based on systematic abstraction and
composition. Our goal is to develop a compositional
real-time scheduling framework to support abstraction
and composition techniques for real-time aspects of
components. In this paper, we present a formal
description of compositional real-time scheduling
problems, which are the component abstraction and
composition problems. We identify issues that need be
addressed by solutions and provide our framework for
the solutions, which is based on the periodic
interface. Specifically, we introduce the periodic
resource model to characterize resource allocations
provided to a single component. We present exact
schedulability conditions for the standard Liu and
Layland periodic task model and the proposed periodic
resource model under EDF and RM scheduling, and we show
that the component abstraction and composition problems
can be addressed with periodic interfaces through the
exact schedulability conditions. We also provide the
utilization bounds of a periodic task set over the
periodic resource model and the abstraction bounds of
periodic interfaces for a periodic task set under EDF
and RM scheduling. We finally present the analytical
bounds of overheads that our solution incurs in terms
of resource utilization increase and evaluate the
overheads through simulations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "abstract; component; composition; hierarchical;
interface; real-time; scheduling",
}
@Article{Voyiatzis:2008:SFS,
author = "Artemios G. Voyiatzis and Dimitrios N. Serpanos",
title = "The security of the {Fiat--Shamir} scheme in the
presence of transient hardware faults",
journal = j-TECS,
volume = "7",
number = "3",
pages = "31:1--31:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347384",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Implementation cryptanalysis has emerged as a
realistic threat for cryptographic systems. It consists
of two classes of attacks: fault-injection and
side-channel attacks. In this work, we examine the
resistance of the Fiat--Shamir scheme to
fault-injection attacks, since Fiat--Shamir is a
popular scheme for ``light'' consumer devices, such as
smartcards, in a wide range of consumer services. We
prove that an existing attack, known as the Bellcore
attack, is incomplete. We propose an extension to the
protocol that proactively secures Fiat--Shamir systems
from the Bellcore attack and we prove its strength.
Finally, we introduce a new attack model, which, under
stronger assumptions, can derive the secret keys from
both the original Fiat--Shamir scheme as well as its
proposed extension. Our approach demonstrates that
countermeasures for implementation cryptanalysis must
be carefully designed and that deployed systems must
include appropriate protection mechanisms for all known
attacks and be flexible enough to incorporate
countermeasures for new ones.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Bellcore attack; cryptography; Fiat--Shamir
identification scheme; side-channel attacks;
smartcards",
}
@Article{Gurun:2008:NGP,
author = "Selim Gurun and Chandra Krintz and Rich Wolski",
title = "{NWSLite}: a general-purpose, nonparametric prediction
utility for embedded systems",
journal = j-TECS,
volume = "7",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347385",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Time series-based prediction methods have a wide range
of uses in embedded systems. Many OS algorithms and
applications require accurate prediction of demand and
supply of resources. However, configuring prediction
algorithms is not easy, since the dynamics of the
underlying data requires continuous observation of the
prediction error and dynamic adaptation of the
parameters to achieve high accuracy. Current prediction
methods are either too costly to implement on
resource-constrained devices or their parameterization
is static, making them inappropriate and inaccurate for
a wide range of datasets. This paper presents NWSLite,
a prediction utility that addresses these shortcomings
on resource-restricted platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "computation offloading; CPU availability estimation;
embedded systems; network performance estimation;
prediction algorithms",
}
@Article{Yan:2008:DOD,
author = "Ting Yan and Yu Gu and Tian He and John A. Stankovic",
title = "Design and optimization of distributed sensing
coverage in wireless sensor networks",
journal = j-TECS,
volume = "7",
number = "3",
pages = "33:1--33:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347386",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "For many sensor network applications, such as military
surveillance, it is necessary to provide full sensing
coverage to a security-sensitive area while, at the
same time, minimizing energy consumption and extending
system lifetime by leveraging the redundant deployment
of sensor nodes. In this paper, we propose a
surveillance service for sensor networks based on a
distributed energy-efficient sensing coverage protocol.
In the protocol, each node is able to dynamically
decide a schedule for itself to guarantee a certain
degree-of-coverage (DOC) with average energy
consumption inversely proportional to the node density.
Several optimizations and extensions are proposed to
enhance the basic design with a better load-balance
feature and a longer network lifetime. We consider and
address the impact of the target size and the
unbalanced initial energy capacity of individual nodes
to the network lifetime. Several practical issues such
as the localization error, irregular sensing range, and
unreliable communication links are addressed as well.
Simulation shows that our protocol extends system
lift-time significantly with low energy consumption. It
outperforms other state-of-the-art schemes by as much
as 50\% reduction in energy consumption and as much as
130\% increase in the half-life of the network.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "energy conservation; sensing coverage; sensor
networks",
}
@Article{Ozer:2008:SBE,
author = "Emre {\"O}zer and Andy P. Nisbet and David Gregg",
title = "A stochastic bitwidth estimation technique for compact
and low-power custom processors",
journal = j-TECS,
volume = "7",
number = "3",
pages = "34:1--34:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347387",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "There is an increasing trend toward compiling from C
to custom hardware for designing embedded systems in
which the area and power consumption of
application-specific functional units, registers, and
memory blocks are heavily dependent on the bit-widths
of integer operands used in computations. The actual
bit-width required to store the values assigned to an
integer variable during the execution of a program will
not, in general, match the built-in C data types. Thus,
precious area is wasted if the built-in data type sizes
are used to declare the size of integer operands. In
this paper, we introduce stochastic bit-width
estimation that follows a simulation-based
probabilistic approach to estimate the bit-widths of
integer variables using extreme value theory. The
estimation technique is also empirically compared to
two compile-time integer bit-width analysis techniques.
Our experimental results show that the stochastic
bit-width estimation technique dramatically reduces
integer bit-widths and, therefore, enables more compact
and power-efficient custom hardware designs than the
compile-time integer bit-width analysis techniques. Up
to 37\% reduction in custom hardware area and 30\%
reduction in logic power consumption using stochastic
bit-width estimation can be attained over ten integer
applications implemented on an FPGA chip.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "bit-width analysis; custom hardware; extreme value
theory; FPGA; statistical estimation",
}
@Article{Kumar:2008:CCP,
author = "Rajeev Kumar and Dipankar Das",
title = "Code compression for performance enhancement of
variable-length embedded processors",
journal = j-TECS,
volume = "7",
number = "3",
pages = "35:1--35:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347388",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most of the work done in the field of code compression
pertains to processors with fixed-length instruction
encoding. The design of a code-compression scheme for
variable-length instruction encodings poses newer
design challenges. In this work, we first investigate
the scope for code compression on variable-length
instruction-set processors whose encodings are already
optimized to a certain extent with respect to their
usage. For such ISAs instruction boundaries are not
known prior to decoding. Another challenging task of
designing a code-compression scheme for such ISAs is
designing the decompression hardware, which must
decompress code postcache so that we gain in
performance. We present two dictionary-based code
compression schemes. The first algorithm uses a
bit-vector; the second one uses reserved instructions
to identify code words. We design additional logic for
each of the schemes to decompress the code on-the-fly.
We test the two algorithms with a variable-length RISC
processor. We provide a detailed experimental analysis
of the empirical results obtained by extensive
simulation-based design space exploration for this
system. The optimized decompressor can now execute
compressed program faster than the native program. The
experiments demonstrate reduction in code size (up to
30\%), speed-up (up to 15\%), and bus-switching
activity (up to 20\%). We also implement one
decompressor in a hardware description language and
synthesize it to illustrate the small overheads
associated with the proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "bus switching; code compression; code decompression;
embedded systems; instruction memory; RISC processor;
variable-length ISAs",
}
@Article{Wilhelm:2008:WCE,
author = "Reinhard Wilhelm and Jakob Engblom and Andreas
Ermedahl and Niklas Holsti and Stephan Thesing and
David Whalley and Guillem Bernat and Christian
Ferdinand and Reinhold Heckmann and Tulika Mitra and
Frank Mueller and Isabelle Puaut and Peter Puschner and
Jan Staschulat and Per Stenstr{\"o}m",
title = "The worst-case execution-time problem---overview of
methods and survey of tools",
journal = j-TECS,
volume = "7",
number = "3",
pages = "36:1--36:??",
month = apr,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1347375.1347389",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 12 15:22:21 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The determination of upper bounds on execution times,
commonly called worst-case execution times (WCETs), is
a necessary step in the development and validation
process for hard real-time systems. This problem is
hard if the underlying processor architecture has
components, such as caches, pipelines, branch
prediction, and other speculative components. This
article describes different approaches to this problem
and surveys several commercially available tools 1 and
research prototypes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "hard real time; worst-case execution times",
}
@Article{Hessell:2008:EES,
author = "Fabiano Hessell and Kenneth Kent and Dionisios
Pnevmatikatos",
title = "Editorial: {Embedded} systems --- new challenges and
future directions",
journal = j-TECS,
volume = "7",
number = "4",
pages = "37:1--37:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376805",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2008:RFF,
author = "Chanik Park and Wonmoon Cheon and Jeonguk Kang and
Kangho Roh and Wonhee Cho and Jin-Soo Kim",
title = "A reconfigurable {FTL} (flash translation layer)
architecture for {NAND} flash-based applications",
journal = j-TECS,
volume = "7",
number = "4",
pages = "38:1--38:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376806",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, a novel FTL (flash translation layer)
architecture is proposed for NAND flash-based
applications such as MP3 players, DSCs (digital still
cameras) and SSDs (solid-state drives). Although the
basic function of an FTL is to translate a logical
sector address to a physical sector address in flash
memory, efficient algorithms of an FTL have a
significant impact on performance as well as the
lifetime. After the dominant parameters that affect the
performance and endurance are categorized, the design
space of the FTL architecture is explored based on a
diverse workload analysis. With the proposed FTL
architectural framework, it is possible to decide which
configuration of FTL mapping parameters yields the best
performance, depending on the differing characteristics
of various NAND flash-based applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Flash memory; FTL; performance analysis;
reconfigurable architecture",
}
@Article{Popovici:2008:PBS,
author = "Katalin Popovici and Xavier Guerin and Frederic
Rousseau and Pier Stanislao Paolucci and Ahmed Amine
Jerraya",
title = "Platform-based software design flow for heterogeneous
{MPSoC}",
journal = j-TECS,
volume = "7",
number = "4",
pages = "39:1--39:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376807",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Current multimedia applications demand complex
heterogeneous multiprocessor architectures with
specific communication infrastructure in order to
achieve the required performances. Programming these
architectures usually results in writing separate
low-level code for the different processors (DSP,
microcontroller), implying late global validation of
the overall application with the hardware platform. We
propose a platform-based software design flow able to
efficiently use the resources of the architecture and
allowing easy experimentation of several mappings of
the application onto the platform resources. We use a
high-level environment to capture both application and
architecture initial representations. An executable
software stack is generated automatically for each
processor from the initial model. The software
generation and validation is performed gradually
corresponding to different software abstraction levels.
Specific software development platforms (abstract
models of the architecture) are generated and used to
allow debugging of the different software components
with explicit hardware-software interaction. We applied
this approach on a multimedia platform, involving a
high performance DSP and a RISC processor, to explore
communication architecture and generate an efficient
executable code for a multimedia application. Based on
automatic tools, the proposed flow increases
productivity and preserves design quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "multimedia; Multiprocessor system-on chip; programming
environment; Simulink; software design; SystemC;
transaction level modeling",
}
@Article{Chattopadhyay:2008:PPA,
author = "A. Chattopadhyay and H. Ishebabi and X. Chen and Z.
Rakosi and K. Karuri and D. Kammler and R. Leupers and
G. Ascheid and H. Meyr",
title = "Prefabrication and postfabrication architecture
exploration for partially reconfigurable {VLIW}
processors",
journal = j-TECS,
volume = "7",
number = "4",
pages = "40:1--40:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376808",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern application-specific instruction-set processors
(ASIPs) face the daunting task of delivering high
performance for a wide range of applications. For
enhancing the performance, architectural features, for
example, pipelining, VLIW, are often employed in ASIPs,
leading to high design complexity. Integrated ASIP
design environments, like template-based approaches and
language-driven approaches, provide an answer to this
growing design complexity. At the same time, increasing
hardware design costs have motivated the processor
designers to introduce high flexibility in the
processor. Flexibility, in its most effective form, can
be introduced to the ASIP by coupling a reconfigurable
unit to the base processor. Because of its obvious
benefits, several reconfigurable ASIPs (rASIPs) have
been designed for years. This design paradigm gained
momentum with the advent of coarse-grained FPGAs, where
the lack of domain-specific performance common in
general-purpose FPGAs are largely overcome by choosing
application-dependent basic functional units. These
rASIP designs lack a generic flow from high-level
specification, resulting in intuitive design decisions
and hard-to-retarget processor design tools. Although
partial, template-based approaches for rASIP design is
existent, a clear design methodology especially for the
prefabrication architecture exploration is not present.
In order to address this issue, a high-level
specification and design methodology for partially
reconfigurable VLIW processors is proposed in this
article. To show the benefit of this approach, a
commercial VLIW processor is used as the base
architecture and two domains of applications are
studied for potential performance gain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ASIP; coarse-grained FPGA; VLIW",
}
@Article{Lin:2008:MAC,
author = "Yi-Neng Lin and Ying-Dar Lin and Yuan-Cheng Lai and
Kuo-Kun Tseng",
title = "Modeling and analysis of core-centric network
processors",
journal = j-TECS,
volume = "7",
number = "4",
pages = "41:1--41:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376809",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Network processors can be categorized into two types,
the coprocessors-centric model in which data-plane is
handled by coprocessors, and the core-centric model in
which the core processes most of the data-plane packets
yet offloading some tasks to coprocessors. While the
former has been properly explored over various
applications, research regarding the latter remain
limited. Based on the previous experience of
prototyping the virtual private network (VPN) over the
IXP425 network processor, this work aims to derive
design implications for the core-centric model
performing computational intensive applications. From
system and IC vendors' perspectives, the
continuous-time Markov chain and Petri net simulations
are adopted to explore this architecture. Analytical
results prove to be quite inline with those of the
simulation and implementation. With subsequent
investigation, we find that appropriate process run
lengths can improve the effective core utilization by
2.26 times, and by offloading the throughput boosts 7.5
times. The results also suggest single-process
programming, since context-switch overhead impacts
considerably on the performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "core-centric; embedded system; modeling; Network
processor; simulation",
}
@Article{Get:2008:PFE,
author = "Jerome Hugues Get and Bechir Zalila Get and Laurent
Pautet Get and Fabrice Kordon",
title = "From the prototype to the final embedded system using
the {Ocarina AADL} tool suite",
journal = j-TECS,
volume = "7",
number = "4",
pages = "42:1--42:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376810",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Building distributed deal-time embedded systems
requires a stringent methodology, from early
requirement capture to full implementation. However,
there is a strong link between the requirements and the
final implementation (e.g., scheduling and resource
dimensioning). Therefore, a rapid prototyping process
based on automation of tedious and error-prone tasks
(analysis and code generation) is required to speed up
the development cycle. In this article, we show how the
AADL ({\em Architecture Analysis and Design
Language\/}), which appeared in late 2004, helps solve
these issues thanks to a dedicated tool suite. We then
detail the prototyping process and its current
implementation: Ocarina.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "AADL; distributed; DRE; embedded; Ocarina; PolyORB-HI;
real-time",
}
@Article{Benveniste:2008:CHR,
author = "Albert Benveniste and Beno{\^\i}t Caillaud and Luca P.
Carloni and Paul Caspi and Alberto L.
Sangiovanni-Vincentelli",
title = "Composing heterogeneous reactive systems",
journal = j-TECS,
volume = "7",
number = "4",
pages = "43:1--43:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376811",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a compositional theory of heterogeneous
reactive systems. The approach is based on the concept
of tags marking the events of the signals of a system.
Tags can be used for multiple purposes from indexing
evolution in time (time stamping) to expressing
relations among signals, like coordination (e.g.,
synchrony and asynchrony) and causal dependencies. The
theory provides flexibility in system modeling because
it can be used both as a unifying mathematical
framework to relate heterogeneous models of
computations and as a formal vehicle to implement
complex systems by combining heterogeneous components.
In particular, we introduce an algebra of tag
structures to define heterogeneous parallel composition
formally. Morphisms between tag structures are used to
define relationships between heterogeneous models at
different levels of abstraction. In particular, they
can be used to represent design transformations from
tightly synchronized specifications to
loosely-synchronized implementations. The theory has an
important application in the correct-by-construction
deployment of synchronous design on distributed
architectures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Compositionality; correct-by-construction design;
GALS; models of computation; reactive systems",
}
@Article{Gebotys:2008:EAW,
author = "Catherine H. Gebotys and Brian A. White",
title = "{EM} analysis of a wireless {Java}-based {PDA}",
journal = j-TECS,
volume = "7",
number = "4",
pages = "44:1--44:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376812",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The susceptibility of wireless portable devices to
electromagnetic (EM) attacks is largely unknown. If
analysis of electromagnetic (EM) waves emanating from
the wireless device during a cryptographic computation
do leak sufficient information, it may be possible for
an attacker to reconstruct the secret key. Possession
of the secret cryptographic key would render all future
wireless communications insecure and cause further
potential problems, such as identity theft. Despite the
complexities of a PDA wireless device, such as
operating system events, interrupts, cache misses, and
other interfering events, this article demonstrates
that, for the first time, repeatable EM differential
attacks are possible. The proposed differential
analysis methodology involves precharacterization of
the PDA device (thresholding and pattern recognition),
and a new frequency-based differential analysis. Unlike
previous research, the new methodology does not require
perfect alignment of EM frames and is repeatable in the
presence of a complex embedded system (including cache
misses, operating system events, etc), thus supporting
attacks on real embedded systems. This research is
important for future wireless embedded systems, which
will increasingly demand higher levels of security.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "countermeasures; EM analysis; power attacks;
Side-channel analysis",
}
@Article{Ayav:2008:IFT,
author = "Tolga Ayav and Pascal Fradet and Alain Girault",
title = "Implementing fault-tolerance in real-time programs by
automatic program transformations",
journal = j-TECS,
volume = "7",
number = "4",
pages = "45:1--45:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376813",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a formal approach to implement
fault-tolerance in real-time embedded systems. The
initial fault-intolerant system consists of a set of
independent periodic tasks scheduled onto a set of
fail-silent processors connected by a reliable
communication network. We transform the tasks such
that, assuming the availability of an additional spare
processor, the system tolerates one failure at a time
(transient or permanent). Failure detection is
implemented using heartbeating, and failure masking
using checkpointing and rollback. These techniques are
described and implemented by automatic program
transformations on the tasks' programs. The proposed
formal approach to fault-tolerance by program
transformations highlights the benefits of separation
of concerns. It allows us to establish correctness
properties and to compute optimal values of parameters
to minimize fault-tolerance overhead. We also present
an implementation of our method, to demonstrate its
feasibility and its efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "checkpointing; correctness proofs; Fault-tolerance;
heartbeating; program transformations",
}
@Article{Middha:2008:MMS,
author = "Bhuvan Middha and Matthew Simpson and Rajeev Barua",
title = "{MTSS}: {Multitask} stack sharing for embedded
systems",
journal = j-TECS,
volume = "7",
number = "4",
pages = "46:1--46:??",
month = jul,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1376804.1376814",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 5 19:32:59 MDT 2008",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Out-of-memory errors are a serious source of
unreliability in most embedded systems. Applications
run out of main memory because of the frequent
difficulty of estimating the memory requirement before
deployment, either because it depends on input data, or
because certain language features prevent estimation.
The typical lack of disks and virtual memory in
embedded systems has a serious consequence when an
out-of-memory error occurs. Without swap space, the
system crashes if its memory footprint exceeds the
available memory by even 1 byte. This work improves
reliability for multitasking embedded systems by
proposing MTSS, a multitask stack sharing technique. If
a task attempts to overflow the bounds of its allocated
stack space, MTSS grows its stack into the stack memory
space allocated for other tasks. This technique can
avoid the out-of-memory error if the extra space
recovered is sufficient to complete execution.
Experiments show that MTSS is able to recover an
average of 54\% of the stack space allocated to the
overflowing task in the free space of other tasks. In
addition, unlike conventional systems, MTSS detects
memory overflows, allowing the possibility of remedial
action or a graceful exit if the recovered space is not
enough. Alternatively, MTSS can be used for decreasing
the required physical memory of an embedded system by
reducing the initial memory allocated to each of the
tasks and recovering the deficit by sharing stack with
other tasks. The overheads of MTSS are low: the runtime
and energy overheads are 3.1\% and 3.2\%, on average.
These are tolerable given that reliability is the most
important concern in virtually all systems, ahead of
other concerns, such as runtime and energy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "cactus stack; data compression; heap overflow; meshed
stack; Out-of-memory errors; reliability; reuse;
runtime checks; stack overflow; virtual memory",
}
@Article{Inoue:2008:FAC,
author = "Hiroaki Inoue and Junji Sakai and Sunao Torii and
Masato Edahiro",
title = "{FIDES}: an advanced chip multiprocessor platform for
secure next generation mobile terminals",
journal = j-TECS,
volume = "8",
number = "1",
pages = "1:1--1:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457247",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose a secure platform on a chip multiprocessor,
FIDES, in order to enable next generation mobile
terminals to execute downloaded native applications for
Linux. Its most important feature is the higher
security based on multigrained separation mechanisms.
Four new technologies support the FIDES platform: bus
filter logic, XIP kernels, policy separation, and
dynamic access control. With these technologies, the
FIDES platform can tolerate both application-level and
kernel-level bugs on an actual download subsystem.
Thus, the best-suited platform to secure next
generation mobile terminals is FIDES.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "chip multiprocessor; Secure mobile terminal; SELinux",
}
@Article{Park:2008:ATL,
author = "Taejoon Park and Kang G. Shin",
title = "Attack-tolerant localization via iterative
verification of locations in sensor networks",
journal = j-TECS,
volume = "8",
number = "1",
pages = "2:1--2:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457248",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In sensor networks, secure localization ---
determining sensors' locations in a hostile, untrusted
environment --- is a challenging, but very important,
problem that has not yet been addressed effectively.
This paper presents an attack-tolerant localization
protocol, called {\em Verification for Iterative
Localization\/} (VeIL), under which sensors
cooperatively safeguard the localization service. By
exploiting the high spatiotemporal correlation existing
between adjacent nodes, VeIL realizes (a) adaptive
management of a profile for normal localization
behavior, and (b) distributed detection of false
locations advertised by attackers by comparing them
against the profile of normal behavior. Our analysis
and simulation results show that VeIL achieves
high-level tolerance to many critical attacks, and is
computationally feasible on resource-limited sensors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Anomaly detection; attack-tolerance; localization;
recursive least squares; sensor networks",
}
@Article{Mitra:2008:VAD,
author = "Sayan Mitra and Daniel Liberzon and Nancy Lynch",
title = "Verifying average dwell time of hybrid systems",
journal = j-TECS,
volume = "8",
number = "1",
pages = "3:1--3:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457249",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Average dwell time (ADT) properties characterize the
rate at which a hybrid system performs mode switches.
In this article, we present a set of techniques for
verifying ADT properties. The stability of a hybrid
system A can be verified by combining these techniques
with standard methods for checking stability of the
individual modes of A.\par
We introduce a new type of simulation relation for
hybrid automata --- {\em switching simulation\/} ---
for establishing that a given automaton A switches more
rapidly than another automaton B. We show that the
question of whether a given hybrid automaton has ADT
{\tau}$_{{\em a \/ }}$ can be answered either by
checking an invariant or by solving an optimization
problem. For classes of hybrid automata for which
invariants can be checked automatically, the
invariant-based method yields an automatic method for
verifying ADT; for automata that are outside this
class, the invariant has to be checked using inductive
techniques. The optimization-based method is automatic
and is applicable to a restricted class of initialized
hybrid automata. A solution of the optimization problem
either gives a counterexample execution that violates
the ADT property, or it confirms that the automaton
indeed satisfies the property. The optimization and the
invariant-based methods can be used in combination to
find the unknown ADT of a given hybrid automaton.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Hybrid systems; optimization-based verification;
simulation relation",
}
@Article{Schirner:2008:QAS,
author = "Gunar Schirner and Rainer D{\"o}mer",
title = "Quantitative analysis of the speed\slash accuracy
trade-off in transaction level modeling",
journal = j-TECS,
volume = "8",
number = "1",
pages = "4:1--4:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457250",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The increasing complexity of embedded systems requires
modeling at higher levels of abstraction. Transaction
level modeling (TLM) has been proposed to abstract
communication for high-speed system simulation and
rapid design space exploration. Although being widely
accepted for its high performance and efficiency, TLM
often exhibits a significant loss in model
accuracy.\par
In this article, we systematically analyze and quantify
the speed/accuracy trade-off in TLM. To this end, we
provide a classification of TLM abstraction levels
based on model granularity and define appropriate
metrics and test setups to quantitatively measure and
compare the performance and accuracy of such
models.\par
Addressing several classes of embedded communication
protocols, we apply our analysis to three common bus
architectures, the industry-standard AMBA advanced
high-performance bus (AHB) as an on-chip parallel bus,
the controller area network (CAN) as an off-chip serial
bus, and the Motorola ColdFire Master Bus as an example
for a custom embedded processor bus.\par
Based on the analysis of these individual busses, we
then generalize our results for a broader conclusion.
The general TLM trade-off offers gains of up to four
orders of magnitude in simulation speed, generally
however, at the price of low accuracy. We conclude
further that model granularity is the key to efficient
TLM abstraction, and we identify conditions for
accuracy of abstract models. As a result, this article
provides general guidelines that allow the system
designer to navigate the TLM trade-off effectively and
choose the most suitable model for the given
application with fast and accurate results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "System level design; system-on-chip; transaction level
modeling",
}
@Article{Zhou:2008:DAT,
author = "Xiangrong Zhou and Peter Petrov",
title = "Direct address translation for virtual memory in
energy-efficient embedded systems",
journal = j-TECS,
volume = "8",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457251",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a methodology for virtual memory
support in energy-efficient embedded systems. A
holistic approach is proposed, where the combined
efforts of compiler, operating system, and hardware
architecture achieve a significant system power
reductions. The application information extracted and
analyzed by the compiler is utilized dynamically by the
microarchitecture and the operating system to perform
energy-efficient and, for many memory references,
time-deterministic address translations. We demonstrate
that by using application information regarding virtual
memory layout, an efficient and conflict-free
translation process can be implemented through the
utilization of a small hardware direct translation
table (DTT) accessed in an application-specific manner.
The set of virtual pages is partitioned into groups,
such that for each group only a few of the least
significant bits are used as an index to obtain the
physical page number. We outline an efficient
compile-time algorithm for identifying these groups and
allocate their translation entries optimally into the
DTT. The introduced hardware is minimal in terms of
area, performance, and power overhead, while offering
the flexibility of software programmability. This is
achieved through a small set of registers and tables,
which are made software accessible. We have
quantitatively evaluated the proposed methodology on a
number of embedded applications, including voice,
image, and video processing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "address translation; Low-power embedded systems;
virtual memory",
}
@Article{Park:2008:QSL,
author = "Jiyong Park and Jaesoo Lee and Saehwa Kim and Seongsoo
Hong",
title = "Quasistatic shared libraries and {XIP} for memory
footprint reduction in {MMU}-less embedded systems",
journal = j-TECS,
volume = "8",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457252",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Despite a rapid decrease in the price of solid state
memory devices, system memory is still a very precious
resource in embedded systems. The use of shared
libraries and execution-in-place (XIP) is known to be
effective in significantly reducing memory usage.
Unfortunately, many resource-constrained embedded
systems lack an MMU, making it extremely difficult to
support these techniques. To address this problem, we
propose a novel shared library technique called a
quasi-static shared library and an XIP, both based on
our enhanced position independent code technique. In
our quasistatic shared libraries, global symbols are
bound to pseudoaddresses at linking time and actual
physical addresses are bound at loading time. Unlike
conventional shared libraries, they do not require
symbol tables that take up valuable memory space and,
therefore, allow for expedited address translation at
runtime. Our XIP technique is facilitated by our
enhanced position independent code where a data section
can be arbitrarily located. Both the shared library and
XIP techniques are made possible by emulating an MMU's
memory mapping feature with a data section base
register (DSBR) and a data section base table
(DSBT).\par
We have implemented these proposed techniques in a
commercial ADSL (Asymmetric Digital Subscriber Line)
home network gateway equipped with an MMU-less ARM7TDMI
processor core, 2MB flash memory, and 16MB RAM. We
measured its memory usage and evaluated its performance
overhead by conducting a series of experiments. These
experiments clearly demonstrate the effectiveness of
our techniques in reducing memory usage. The results
are impressive: 35\% reduction in flash memory usage
when using only the shared library and 30\% reduction
in RAM usage when using the shared library and XIP
together. These results were achieved with only a
negligible performance penalty of less than 4\%. Even
though these techniques were applied to uClinux-based
embedded systems, they can be used for any MMU-less
real-time operating system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Embedded systems; memory footprint reduction;
MMU-less; quasi-static linking; shared library",
}
@Article{Yan:2008:AWC,
author = "Jun Yan and Wei Zhang",
title = "Analyzing the worst-case execution time for
instruction caches with prefetching",
journal = j-TECS,
volume = "8",
number = "1",
pages = "7:1--7:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457253",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Time predictability is one of the most important
design considerations for real-time systems. In this
article, we study the impact of instruction prefetching
on the worst-case performance of instruction caches. We
extend the static cache simulation technique to model
and compute the worst-case instruction cache
performance with prefetching. The evaluation results
show that instruction prefetching can benefit both the
average-case and worst-case performance; however, the
degree of the worst-case performance improvement due to
instruction prefetching is less than that of the
average-case performance. As a result, the time
variation of computing is increased by instruction
prefetching. Also, our experimental results indicate
that the prefetching distance can significantly impact
the worst-case performance of instruction caches with
instruction prefetching. Specifically, when the
prefetching distance is equal to the L1 miss penalty,
the worst-case execution time with instruction
prefetching is minimized.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "hard real-time; instruction caches; instruction
prefetching; Worst-case execution time analysis",
}
@Article{Aaraj:2008:ADH,
author = "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha",
title = "Analysis and design of a hardware\slash software
trusted platform module for embedded systems",
journal = j-TECS,
volume = "8",
number = "1",
pages = "8:1--8:??",
month = dec,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457246.1457254",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 6 14:36:01 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Trusted platforms have been proposed as a promising
approach to enhance the security of general-purpose
computing systems. However, for many
resource-constrained embedded systems, the size and
cost overheads of a separate Trusted Platform Module
(TPM) chip are not acceptable. One alternative is to
use a software-based TPM, which implements TPM
functions using software that executes in a protected
execution domain on the embedded processor itself.
However, since many embedded systems have limited
processing capabilities and are battery-powered, it is
also important to ensure that the computational and
energy requirements for SW-TPMs are acceptable.\par
In this article, we perform an evaluation of the energy
and execution time overheads for a SW-TPM
implementation on a handheld appliance (Sharp Zaurus
PDA). We characterize the execution time and energy
required by each TPM command through actual
measurements on the target platform. We observe that
for most commands, overheads are primarily due to the
use of 2,048-bit RSA operations that are performed
within the SW-TPM. In order to alleviate SW-TPM
overheads, we evaluate the use of Elliptic Curve
Cryptography (ECC) as a replacement for the RSA
algorithm specified in the Trusted Computing Group
(TCG) standards. In addition, we also evaluate the
overheads of using the SW-TPM in the context of various
end applications, including trusted boot of the Linux
operating system (OS), a secure VoIP client, and a
secure Web browser. Furthermore, we analyze the
computational workload involved in running SW-TPM
commands using ECC. We then present a suite of hardware
and software enhancements to accelerate these commands
--- generic custom instructions and exploitation of
parallel processing capabilities in multiprocessor
systems-on-chip (SoCs). We report results of evaluating
the proposed architectures on a commercial embedded
processor (Xtensa from Tensilica). Through uniprocessor
and multiprocessor optimizations, we could achieve
speed-ups of up to 5.71X for individual TPM commands.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Custom instructions; embedded systems; multiprocessor
systems",
}
@Article{Suresh:2009:EEE,
author = "Dinesh C. Suresh and Banit Agrawal and Jun Yang and
Walid Najjar",
title = "Energy-efficient encoding techniques for off-chip data
buses",
journal = j-TECS,
volume = "8",
number = "2",
pages = "9:1--9:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457256",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Reducing the power consumption of computing devices
has gained a lot of attention recently. Many research
works have focused on reducing power consumption in the
off-chip buses as they consume a significant amount of
total power. Since the bus power consumption is
proportional to the switching activity, reducing the
bus switching is an effective way to reduce bus power.
While numerous techniques exist for reducing bus power
in address buses, only a handful of techniques have
been proposed for data-bus power reduction, where
frequent value encoding (FVE) is the best existing
scheme to reduce the transition activity on the data
buses.\par
In this article, we propose improved frequent value
data bus-encoding techniques aimed at reducing more
switching activity and, hence, power consumption. We
propose three new schemes and five new variations to
exploit bit-wise temporal and spatial locality in the
data-bus values. Our techniques just use one external
control signal and capture bit-wise locality to
efficiently encode data values. For all the embedded
and SPEC applications we tested, the overall average
switching reduction is 53\% over unencoded data and
10\% more than the conventional FVE scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "bus switching; encoding; internal capacitances;
Low-power data buses",
}
@Article{Kejariwal:2009:ELL,
author = "Arun Kejariwal and Alexander V. Veidenbaum and
Alexandru Nicolau and Milind Girkar and Xinmin Tian and
Hideki Saito",
title = "On the exploitation of loop-level parallelism in
embedded applications",
journal = j-TECS,
volume = "8",
number = "2",
pages = "10:1--10:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457257",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advances in the silicon technology have enabled
increasing support for hardware parallelism in embedded
processors. Vector units, multiple processors/cores,
multithreading, special-purpose accelerators such as
DSPs or cryptographic engines, or a combination of the
above have appeared in a number of processors. They
serve to address the increasing performance
requirements of modern embedded applications. To what
extent the available hardware parallelism can be
exploited is directly dependent on the amount of
parallelism inherent in the given application and the
congruence between the granularity of hardware and
application parallelism. This paper discusses how
loop-level parallelism in embedded applications can be
exploited in hardware and software. Specifically, it
evaluates the efficacy of automatic loop
parallelization and the performance potential of
different types of parallelism, viz., true thread-level
parallelism (TLP), speculative thread-level parallelism
and vector parallelism, when executing loops.
Additionally, it discusses the interaction between
parallelization and vectorization. Applications from
both the industry-standard EEMBC{\reg},$^1$ 1.1, EEMBC
2.0 and the academic MiBench embedded benchmark suites
are analyzed using the Intel{\reg}$^2$ C compiler. The
results show the performance that can be achieved today
on real hardware and using a production compiler,
provide upper bounds on the performance potential of
the different types of thread-level parallelism, and
point out a number of issues that need to be addressed
to improve performance. The latter include
parallelization of libraries such as libc and design of
parallel algorithms to allow maximal exploitation of
parallelism. The results also point to the need for
developing new benchmark suites more suitable to
parallel compilation and execution.\par
$^1$ Other names and brands may be claimed as the
property of others.\par
$^2$ Intel is a trademark of Intel Corporation or its
subsidiaries in the United States and other
countries.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "libraries; Multi-cores; multithreading; parallel
loops; programming models; system-on-chip (Soc);
thread-level speculation; vectorization",
}
@Article{Hashemi:2009:TDS,
author = "Matin Hashemi and Soheil Ghiasi",
title = "Throughput-driven synthesis of embedded software for
pipelined execution on multicore architectures",
journal = j-TECS,
volume = "8",
number = "2",
pages = "11:1--11:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457258",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a methodology for pipelined software
synthesis of streaming applications. First, we develop
a versatile task assignment algorithm capable of
optimizing realistically-arbitrary cost functions for
two cores. The algorithm is exact (i.e., theoretically
optimal) contrary to existing heuristics. Second, our
approximation technique provides an adjustable knob to
trade solution quality with algorithm runtime and
memory. Third, we develop a recursive heuristic for
more cores. FPGA-based emulated experiments validate
our theoretical results. The exact algorithm yields 1.7
\times throughput improvement. The approximation method
offers a range of tradeoff points (e.g., 3 \times
faster with 20 \times less memory) while degrading the
throughput only 1\% to 5\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Embedded software; graph partitioning; multi-core
hardware; streaming applications; task assignment",
}
@Article{Chattopadhyay:2009:PPA,
author = "A. Chattopadhyay and H. Ishebabi and X. Chen and Z.
Rakosi and K. Karuri and D. Kammler and R. Leupers and
G. Ascheid and H. Meyr",
title = "Pre- and postfabrication architecture exploration for
partially reconfigurable {VLIW} processors",
journal = j-TECS,
volume = "8",
number = "2",
pages = "12:1--12:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457259",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern application-specific instruction-set processors
(ASIPs) face the daunting task of delivering high
performance for a wide range of applications. For
enhancing the performance, architectural features
(e.g., pipelining, VLIW) are often employed in ASIPs,
leading to high design complexity. Integrated ASIP
design environments like template-based approaches and
language-driven approaches provide an answer to this
growing design complexity. At the same time, increasing
hardware design costs have motivated the processor
designers to introduce high flexibility in the
processor. Flexibility, in its most effective form, can
be introduced to the ASIP by coupling a reconfigurable
unit to the base processor. Due to its obvious
benefits, several reconfigurable ASIPs (rASIPs) have
been designed for years. This design paradigm gained
momentum with the advent of coarse-grained FPGAs, where
the lack of domain-specific performance common in
general-purpose FPGAs are largely overcome by choosing
application-dependent basic functional units. These
rASIP designs lack a generic flow from high-level
specification, resulting into intuitive design
decisions and hard-to-retarget processor design tools.
Although partial, template-based approaches for rASIP
design is existent, a clear design methodology
especially for the prefabrication architecture
exploration is not present. In order to address this
issue, a high-level specification and design
methodology for partially reconfigurable VLIW
processors is proposed in this article. To show the
benefit of this approach a commercial VLIW processor is
used as the base architecture and two domains of
applications are studied for potential performance
gain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ASIP; coarse-grained FPGA; VLIW",
}
@Article{Lin:2009:MAC,
author = "Yi-Neng Lin and Ying-Dar Lin and Kuo-Kun Tseng and
Yuan-Cheng Lai",
title = "Modeling and analysis of core-centric network
processors",
journal = j-TECS,
volume = "8",
number = "2",
pages = "13:1--13:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457260",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Network processors can be categorized into two types,
the coprocessors-centric model in which the data-plane
is handled by coprocessors, and the core-centric model
in which the core processes most of the data-plane
packets yet offloading some tasks to coprocessors.
While the former has been properly explored over
various applications, researches regarding the latter
remain limited. Based on the previous experience of
prototyping the virtual private network (VPN) over the
IXP425 network processor, this work aims to derive
design implications for the core-centric model
performing computational intensive applications. From
system and IC vendors' perspectives, the
continuous-time Markov chain and Petri net simulations
are adopted to explore this architecture. Analytical
results prove to be quite inline with those of the
simulation and implementation. With subsequent
investigation we find that appropriate process run
lengths can improve the effective core utilization by
2.26 times, and by offloading the throughput boosts 7.5
times. The results also suggest single process
programming since context switch overhead impacts
considerably on the performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "core-centric; embedded system; modeling; Network
processor; simulation",
}
@Article{Zhou:2009:CLC,
author = "Xiangrong Zhou and Peter Petrov",
title = "Cross-layer customization for rapid and low-cost task
preemption in multitasked embedded systems",
journal = j-TECS,
volume = "8",
number = "2",
pages = "14:1--14:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457261",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Preemptive multitasking is widely used in many
low-cost and real-time embedded applications for its
superior hardware utilization. The frequent and
asynchronous context switches, however, require the
preservation and restoration of the task state, thus
resulting in a large number of memory transfer
instructions. As a consequence, task responsiveness and
application throughput can be significantly
deteriorated. To address this problem we propose a
cross-layer customization framework which through the
close cooperation of compiler, OS, and hardware
architecture achieves rapid and low-cost task switch.
Application information extracted during compile-time
regarding state liveness is exploited in order to
preserve a minimal amount of task state on task
preemption. We introduce two complementary techniques
to implement the application-aware state preservation.
The first technique utilizes compiler-generated custom
routines which preserve/restore an extremely small live
context at judiciously selected points in the
application code. The second technique requires more
sophisticated hardware support. It employs an
OS-controlled register file mapping to achieve a rapid
context switch. By mapping a small fraction of the
register file in a single clock cycle, a context switch
is achieved requiring no memory transfers for the
majority of cases to preserve/restore the live state.
The effect of aggressively replicated register files,
where each task is given its own replica, is achieved
with the hardware cost of only adding from 25\% to 50\%
extra physical registers. Through the utilization of
these novel mechanisms, a significant improvement on
task response time is achieved as the context-switch
cost is minimized.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Auerbach:2009:LLT,
author = "Joshua Auerbach and David F. Bacon and Daniel Iercan
and Christoph M. Kirsch and V. T. Rajan and Harald
R{\"o}ck and Rainer Trummer",
title = "Low-latency time-portable real-time programming with
{Exotasks}",
journal = j-TECS,
volume = "8",
number = "2",
pages = "15:1--15:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457262",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "{\em Exotasks\/} are a novel Java programming
construct that achieve three important goals. They
achieve low latency while allowing the fullest use of
Java language features, compared to previous attempts
to restrict the Java language for use in the
submillisecond domain. They support pluggable
schedulers, allowing easy implementation of new
scheduling paradigms in a real-time Java system. They
can achieve deterministic timing, even in the presence
of other Java threads, and across changes of hardware
and software platform. To achieve these goals, the
program is divided into tasks with private heaps. Tasks
may be strongly isolated, communicating only with each
other and guaranteeing determinism, or weakly isolated,
allowing some communication with the rest of the Java
application. Scheduling of the tasks' execution,
garbage collection, and value passing is accomplished
by the pluggable scheduler. Schedulers that we have
written employ logical execution time (LET) in
association with strong isolation to achieve time
portability. We have also built a quad-rotor model
helicopter, the JAviator, which we use to evaluate our
implementation of Exotasks in an experimental embedded
version of IBM's J9 real-time virtual machine. Our
experiments show that we are able to maintain very low
scheduling jitter and deterministic behavior in the
face of variations in both software load and hardware
platform. We also show that Exotasks perform nearly as
well as Eventrons on a benchmark audio application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Real-time scheduling; time portability; UAVs; virtual
machine",
}
@Article{Ahn:2009:RCT,
author = "Minwook Ahn and Yunheung Paek",
title = "Register coalescing techniques for heterogeneous
register architecture with copy sifting",
journal = j-TECS,
volume = "8",
number = "2",
pages = "16:1--16:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457263",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Optimistic coalescing has been proven as an elegant
and effective technique that provides better chances of
safely coloring more registers in register allocation
than other coalescing techniques. Its algorithm
originally assumes homogeneous registers, which are all
gathered in the same register file. Although this
register architecture is still common in most
general-purpose processors, embedded processors often
contain heterogeneous registers, which are scattered in
physically different register files dedicated for each
dissimilar purpose and use. In this work, we show that
optimistic coalescing is also useful for an embedded
processor to better handle such heterogeneity of the
register architecture, and developed a modified
algorithm for optimal coalescing that helps a register
allocator. In the experiment, an existing register
allocator was able to achieve up to 13.0\% reduction in
code size through our coalescing, and avoid many spills
that would have been generated without our scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compiler; embedded processors; heterogeneous register
architecture; Register allocation; register
coalescing",
}
@Article{Mozumdar:2009:CSP,
author = "Mohammad Mostafizur Rahman Mozumdar and Luciano
Lavagno and Laura Vanzago",
title = "A comparison of software platforms for wireless sensor
networks: {MANTIS}, {TinyOS}, and {ZigBee}",
journal = j-TECS,
volume = "8",
number = "2",
pages = "17:1--17:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1457255.1457264",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 5 19:15:05 MST 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless sensor networks are characterized by very
tight code size and power constraints and by a lack of
well-established standard software development
platforms such as Posix. In this article, we present a
comparative study between a few fairly different such
platforms, namely MANTIS, TinyOS, and ZigBee, when
considering them from the application developer's
perspective, that is, by focusing mostly on functional
aspects, rather than on performance or code size. In
other words, we compare both the tasking model used by
these platforms and the API libraries they offer.
Sensor network applications are basically event based,
so most of the software platforms are also built on
considering event handling mechanism, however some use
a more traditional thread based model. In this article,
we consider implementations of a simple generic
application in MANTIS, TinyOS, and the Ember ZigBee
development framework, with the goal of depicting major
differences between these platforms, and suggesting a
programming style aimed at maximizing portability
between them.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "application porting; software platform; Wireless
sensor networks",
}
@Article{Unnikrishnan:2009:RMR,
author = "P. Unnikrishnan and G. Chen and M. Kandemir and M.
Karakoy and I. Kolcu",
title = "Reducing memory requirements of resource-constrained
applications",
journal = j-TECS,
volume = "8",
number = "3",
pages = "17:1--17:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509289",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded computing platforms are often resource
constrained, requiring great design and implementation
attention to memory-power-, and heat-related
parameters. An important task for a compiler in such
platforms is to simplify the process of developing
applications for limited memory devices and
resource-constrained clients. Focusing on
array-intensive embedded applications to be executed on
single CPU-based architectures, this work explores how
loop-based compiler optimizations can be used for
increasing memory location reuse. Our goal is to
transform a given application in such a way that the
resulting code has fewer cases (as compared to the
original code), where the lifetimes of array elements
overlap. The reduction in lifetimes of array elements
can then be exploited by reusing memory locations as
much as possible. Our experimental results indicate
that the proposed strategy reduces data space
requirements of 15 resource constrained applications by
more than 40\%, on average. We also demonstrate how
this strategy can be combined with data locality (cache
behavior)--enhancing techniques so that a compiler can
take advantage of both, that is, reduce data memory
requirements and improve data locality at the same
time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compilers; embedded system; lifetime; Memory; reuse",
}
@Article{Weng:2009:AMN,
author = "Ning Weng and Tilman Wolf",
title = "Analytic modeling of network processors for parallel
workload mapping",
journal = j-TECS,
volume = "8",
number = "3",
pages = "18:1--18:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509290",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Network processors are heterogeneous system-on-chip
multiprocessors that are optimized to perform packet
forwarding and processing tasks at Gigabit data rates.
To meet the performance demands of increasing link
speeds and complex network applications, network
processors are implemented with several dozen embedded
processor cores and hardware accelerators that run
multiple packet processing applications in parallel.
The parallel nature of the processing system makes it
increasingly difficult for application developers to
understand and manage resources and map processing
tasks to the hardware. To address this problem, we
present a methodology for profiling and analyzing
network processor applications, mapping processing
tasks to a generalized network processor architecture,
and analytically determining the expected throughput
performance. The key novelty of this work is not only
the adaptation of application analysis and mapping
algorithms to heterogeneous network processors, but
also that the entire process can be automated and
hidden from the application developer. Starting with
the analysis of a uniprocessor implementation of the
application, the process yields a mapping of the
partitioned application that shows best performance for
a given network processor system. The simplicity of the
proposed randomized mapping algorithm allows the use of
this methodology in network processor runtime systems
where dynamic reallocation of tasks is necessary but
processing power is limited. We present results that
show the effectiveness of the analysis and mapping
methodology as well as its application to design space
exploration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Application profiling; embedded systems;
multiprocessor scheduling; network processors",
}
@Article{Tseng:2009:FSA,
author = "Kuo-Kun Tseng and Yuan-Cheng Lai and Ying-Dar Lin and
Tsern-Huei Lee",
title = "A fast scalable automaton-matching accelerator for
embedded content processors",
journal = j-TECS,
volume = "8",
number = "3",
pages = "19:1--19:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509291",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Home and office network gateways often employ a
cost-effective embedded network processor to handle
their network services. Such network gateways have
received strong demand for applications dealing with
intrusion detection, keyword blocking, antivirus and
antispam. Accordingly, we were motivated to propose an
appropriate fast scalable automaton-matching (FSAM)
hardware to accelerate the embedded network processors.
Although automaton matching algorithms are robust with
deterministic matching time, there is still plenty of
room for improving their average-case performance. FSAM
employs novel prehash and root-index techniques to
accelerate the matching for the nonroot states and the
root state, respectively, in automation based hardware.
The prehash approach uses some hashing functions to
pretest the input substring for the nonroot states
while the root-index approach handles multiple bytes in
one single matching for the root state. Also, FSAM is
applied in a prevalent automaton algorithm,
Aho--Corasick (AC), which is often used in many
content-filtering applications. When implemented in
FPGA, FSAM can perform at the rate of 11.1Gbps with the
pattern set of 32,634 bytes, demonstrating that our
proposed approach can use a small logic circuit to
achieve a competitive performance, although a larger
memory is used. Furthermore, the amount of patterns in
FSAM is not limited by the amount of internal circuits
and memories. If the high-speed external memories are
employed, FSAM can support up to 21,302 patterns while
maintaining similar high performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Aho--Corasick; automaton; Bloom filter; content
filtering; String matching",
}
@Article{Reshadi:2009:HCS,
author = "Mehrdad Reshadi and Prabhat Mishra and Nikil Dutt",
title = "Hybrid-compiled simulation: an efficient technique for
instruction-set architecture simulation",
journal = j-TECS,
volume = "8",
number = "3",
pages = "20:1--20:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509292",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Instruction-set simulators are critical tools for the
exploration and validation of new processor
architectures. Due to the increasing complexity of
architectures and time-to-market pressure, performance
is the most important feature of an instruction-set
simulator. Interpretive simulators are flexible but
slow, whereas compiled simulators deliver speed at the
cost of flexibility and compilation overhead. This
article presents a hybrid instruction-set-compiled
simulation (HISCS) technique for generation of fast
instruction-set simulators that combines the benefit of
both compiled and interpretive simulation. This article
makes two important contributions: (i) it improves the
interpretive simulation performance by applying
compiled simulation at the instruction level using a
novel template-customization technique to generate
optimized decoded instructions during compile time; and
(ii) it reduces the compile-time overhead by combining
the benefits of both static and dynamic-compiled
simulation. Our experimental results using two
contemporary processors (ARM7 and SPARC) demonstrate an
order-of-magnitude reduction in compilation time as
well as a 70\% performance improvement, on average,
over the best-known published result in instruction-set
simulation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Compiled simulation; instruction set architecture;
interpretive simulation; partial evaluation",
}
@Article{Nguyen:2009:MAE,
author = "Nghi Nguyen and Angel Dominguez and Rajeev Barua",
title = "Memory allocation for embedded systems with a
compile-time-unknown scratch-pad size",
journal = j-TECS,
volume = "8",
number = "3",
pages = "21:1--21:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509293",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents the first memory allocation
scheme for embedded systems having a scratch-pad memory
whose size is unknown at compile time. A scratch-pad
memory (SPM) is a fast compiler-managed SRAM that
replaces the hardware-managed cache. All existing
memory allocation schemes for SPM require the SPM size
to be known at compile time. Unfortunately, because of
this constraint, the resulting executable is tied to
that size of SPM and is not portable to other processor
implementations having a different SPM size.
Size-portable code is valuable when programs are
downloaded during deployment either via a network or
portable media. Code downloads are used for fixing bugs
or for enhancing functionality. The presence of
different SPM sizes in different devices is common
because of the evolution in VLSI technology across
years. The result is that SPM cannot be used in such
situations with downloaded codes.\par
To overcome this limitation, our work presents a
compiler method whose resulting executable is portable
across SPMs of any size. Our technique is to employ a
customized installer software, which decides the SPM
allocation just before the program's first run, since
the SPM size can be discovered at that time. The
installer then, based on the decided allocation,
modifies the program executable accordingly. The
resulting executable places frequently used objects in
SPM, considering both code and data for placement. To
keep the overhead low, much of the preprocessing for
the allocation is done at compile time. Results show
that our benchmarks average a 41\% speedup versus an
all-DRAM allocation, while the optimal static
allocation scheme, which knows the SPM size at compile
time and is thus an unachievable upper-bound and is
only slightly faster (45\% faster than all-DRAM).
Results also show that the overhead from our customized
installer averages about 1.5\% in code size, 2\% in
runtime, and 3\% in compile time for our benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compiler; data linked list; downloadable codes;
embedded loading; embedded systems; Memory allocation;
scratch-pad",
}
@Article{Lysecky:2009:DIM,
author = "Roman Lysecky and Frank Vahid",
title = "Design and implementation of a {MicroBlaze}-based warp
processor",
journal = j-TECS,
volume = "8",
number = "3",
pages = "22:1--22:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509294",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "While soft processor cores provided by FPGA vendors
offer designers with increased flexibility, such
processors typically incur penalties in performance and
energy consumption compared to hard processor core
alternatives. The recently developed technology of warp
processing can help reduce those penalties. Warp
processing is the dynamic and transparent
transformation of critical software regions from
microprocessor execution to much faster circuit
execution on an FPGA. In this article, we describe an
implementation of a warp processor on a Xilinx
Virtex-II Pro and Spartan3 FPGAs incorporating one or
more MicroBlaze soft processor cores. We further
provide a detailed analysis of the energy overhead of
dynamically partitioning an application's kernels to
hardware executing within an FPGA. Considering an
implementation that periodically partitions the
executing application once every minute, a
MicroBlaze-based warp processor implemented on a
Spartan3 FPGA achieves average speedups of 5.8\times
and energy reductions of 49\% compared to the
MicroBlaze soft processor core alone --- providing
competitive performance and energy consumption compared
to existing hard processor cores.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "configurable logic; dynamic optimization; FPGA;
hardware/software partitioning; just-in-time (JIT)
compilation; soft processor cores; Warp processors",
}
@Article{Bai:2009:MME,
author = "Lan S. Bai and Lei Yang and Robert P. Dick",
title = "{MEMMU}: {Memory} expansion for {MMU}-less embedded
systems",
journal = j-TECS,
volume = "8",
number = "3",
pages = "23:1--23:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509295",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Random access memory (RAM) is tightly constrained in
the least expensive, lowest-power embedded systems such
as sensor network nodes and portable consumer
electronics. The most widely used sensor network nodes
have only 4 to 10KB of RAM and do not contain memory
management units (MMUs). It is difficult to implement
complex applications under such tight memory
constraints. Nonetheless, price and power-consumption
constraints make it unlikely that increases in RAM in
these systems will keep pace with the increasing memory
requirements of applications.\par
We propose the use of automated compile-time and
runtime techniques to increase the amount of usable
memory in MMU-less embedded systems. The proposed
techniques do not increase hardware cost, and require
few or no changes to existing applications. We have
developed runtime library routines and compiler
transformations to control and optimize the automatic
migration of application data between compressed and
uncompressed memory regions, as well as a fast
compression algorithm well suited to this application.
These techniques were experimentally evaluated on
Crossbow TelosB sensor network nodes running a number
of data-collection and signal-processing applications.
Our results indicate that available memory can be
increased by up to 50\% with less than 10\% performance
degradation for most benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Data compression; embedded system; wireless sensor
network",
}
@Article{Doblander:2009:NSF,
author = "Andreas Doblander and Andreas Zoufal and Bernhard
Rinner",
title = "A novel software framework for embedded multiprocessor
smart cameras",
journal = j-TECS,
volume = "8",
number = "3",
pages = "24:1--24:??",
month = apr,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1509288.1509296",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 16:29:24 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Distributed smart cameras (DSC) are an emerging
technology for a broad range of important applications
including smart rooms, surveillance, entertainment,
tracking, and motion analysis. By having access to many
views and through cooperation among the individual
cameras, these DSCs have the potential to realize many
more complex and challenging applications than
single-camera systems.\par
This article focuses on the system-level software
required for efficient streaming applications on single
smart cameras as well as on networks of DSCs. Embedded
platforms with limited resources do not provide
middleware services well known on general-purpose
platforms. Our software framework supports transparent
intra- and interprocessor communication while keeping
the memory and computation overhead very low. The
software framework is based on a publisher--subscriber
architecture and provides mechanisms for dynamically
loading and unloading software components as well as
for graceful degradation in case of software- and
hardware-related faults. The software framework has
been completely implemented and tested on our embedded
smart cameras consisting of an ARM-based network
processor and several digital signal processors. Two
case studies demonstrate the feasibility of our
approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "distributed embedded systems; fault tolerance;
publisher--subscriber; Smart cameras; video
surveillance",
}
@Article{Li:2009:ELC,
author = "Zhiyuan Li and Santosh Pande",
title = "Editorial: {Languages}, compilers, and tools for
embedded systems",
journal = j-TECS,
volume = "8",
number = "4",
pages = "25:1--25:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550988",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Murray:2009:CTI,
author = "Alastair C. Murray and Richard V. Bennett and
Bj{\"o}rn Franke and Nigel Topham",
title = "Code transformation and instruction set extension",
journal = j-TECS,
volume = "8",
number = "4",
pages = "26:1--26:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550989",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The demand for flexible embedded solutions and short
time-to-market has led to the development of extensible
processors that allow for customization through
user-defined instruction set extensions (ISEs). These
are usually identified from plain C sources. In this
article, we propose a combined exploration of code
transformations and ISE identification. The resulting
performance of such a combination has been measured on
two benchmark suites. Our results demonstrate that
combined code transformations and ISEs can yield
average performance improvements of 49\%. This
outperforms ISEs when applied in isolation, and in
extreme cases yields a speed-up of 2.85.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ASIPs; compilers; Customizable processors; design
space exploration; instruction set extension;
source-level transformations",
}
@Article{Hu:2009:CAS,
author = "Jie Hu and Feihui Li and Vijay Degalahal and Mahmut
Kandemir and N. Vijaykrishnan and Mary J. Irwin",
title = "Compiler-assisted soft error detection under
performance and energy constraints in embedded
systems",
journal = j-TECS,
volume = "8",
number = "4",
pages = "27:1--27:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550990",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Soft errors induced by terrestrial radiation are
becoming a significant concern in architectures
designed in newer technologies. If left undetected,
these errors can result in catastrophic consequences or
costly maintenance problems in different embedded
applications. In this article, we focus on utilizing
the compiler's help in duplicating instructions for
error detection in VLIW datapaths. The instruction
duplication mechanism is further supported by a
hardware enhancement for efficient result verification,
which avoids the need of additional comparison
instructions. In the proposed approach, the compiler
determines the instruction schedule by balancing the
permissible performance degradation and the energy
constraint with the required degree of duplication. Our
experimental results show that our algorithms allow the
designer to perform trade-off analysis between
performance, reliability, and energy consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compilers; Embedded systems; energy consumption;
instruction duplication; reliability; soft errors",
}
@Article{Jafari:2009:EPR,
author = "Roozbeh Jafari and Hassan Ghasemzadeh and Foad Dabiri
and Ani Nahapetian and Majid Sarrafzadeh",
title = "An efficient placement and routing technique for
fault-tolerant distributed embedded computing",
journal = j-TECS,
volume = "8",
number = "4",
pages = "28:1--28:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550991",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents an efficient technique for
placement and routing of sensors/actuators and
processing units in a grid network. The driver
application that we present is a medical jacket, which
requires an extremely high level of robustness and
fault tolerance. The power consumption of such jacket
is another key technological constraint. Our proposed
interconnection network is a mesh of wires. A jacket
made of fabric and wires would be susceptible to
accidental damage via tears. By modeling the tears, we
evaluate the probability of having failures on every
segment of wires in our mesh interconnection network.
Then, we study two problems of placement and routing in
the sensor networks such that the fault tolerance is
maximized while the power consumption is minimized. We
develop efficient integer linear programming (ILP)
formulations to address these problems and perform both
placement and routing, simultaneously. This ensures
that the solution is a lower bound for both problems.
We evaluate the effectiveness of our proposed
techniques on a variety of benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Distributed embedded system; fault tolerance;
placement; routing; sensor networks",
}
@Article{Lee:2009:CIA,
author = "Edward A. Lee and Xiaojun Liu and Stephen
Neuendorffer",
title = "Classes and inheritance in actor-oriented design",
journal = j-TECS,
volume = "8",
number = "4",
pages = "29:1--29:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550992",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Actor-oriented components emphasize concurrency and
temporal semantics and are used for modeling and
designing embedded software and hardware. Actors
interact with one another through ports via a messaging
schema that can follow any of several concurrent
semantics. Domain-specific actor-oriented languages and
frameworks are common (Simulink, LabVIEW, SystemC,
etc.). However, they lack many modularity and
abstraction mechanisms that programmers have become
accustomed to in object-oriented components, such as
classes, inheritance, interfaces, and polymorphism,
except as inherited from the host language. This
article shows a form that such mechanisms can take in
actor-oriented components, gives a formal structure,
and describes a prototype implementation. The
mechanisms support actor-oriented class definitions,
subclassing, inheritance, and overriding. The formal
structure imposes structural constraints on a model
(mainly the ``derivation invariant'') that lead to a
policy to govern inheritance. In particular, the
structural constraints permit a disciplined form of
multiple inheritance with unambiguous inheritance and
overriding behavior. The policy is based formally on a
generalized ultrametric space with some remarkable
properties. In this space, inheritance is favored when
actors are ``closer'' (in the generalized ultrametric),
and we show that when inheritance can occur from
multiple sources, one source is always unambiguously
closer than the other.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Actors; components; generalized ultrametric;
inheritance; interfaces; overriding; type systems",
}
@Article{Riccobene:2009:SCB,
author = "Elvinia Riccobene and Patrizia Scandurra and Sara
Bocchio and Alberto Rosti and Luigi Lavazza and Luigi
Mantellini",
title = "{SystemC\slash C-based} model-driven design for
embedded systems",
journal = j-TECS,
volume = "8",
number = "4",
pages = "30:1--30:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550993",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article summarizes our effort, since 2004 up to
the present time, for improving the current industrial
Systems-on-Chip and Embedded Systems design by joining
the capabilities of the unified modeling language (UML)
and SystemC/C programming languages to operate at
system-level. The proposed approach exploits the OMG
model-driven architecture --- a framework for
Model-driven Engineering --- capabilities of reducing
abstract, coarse-grained and platform-independent
system models to fine-grained and platform-specific
models. We first defined a design methodology and a
development flow for the hardware, based on a SystemC
UML profile and encompassing different levels of
abstraction. We then included a multithread C UML
profile for modelling software applications. Both
SystemC/C profiles are consistent sets of modelling
constructs designed to lift the programming features
(both structural and behavioral) of the two coding
languages to the UML modeling level. The new codesign
flow is supported by an environment, which allows
system modeling at higher abstraction levels (from a
functional executable level to a register transfer
level) and supports automatic
code-generation/back-annotation from/to UML models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "C; ES; MDE; SoC; SystemC; UML",
}
@Article{Bini:2009:MCE,
author = "Enrico Bini and Giorgio Buttazzo and Giuseppe Lipari",
title = "Minimizing {CPU} energy in real-time systems with
discrete speed management",
journal = j-TECS,
volume = "8",
number = "4",
pages = "31:1--31:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550994",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a general framework to analyze
and design embedded systems minimizing the energy
consumption without violating timing requirements. A
set of realistic assumptions is considered in the model
in order to apply the results in practical real-time
applications. The processor is assumed to have as a set
of discrete operating modes, each characterized by
speed and power consumption. The energy overhead and
the transition delay incurred during mode switches are
considered. Task computation times are modeled with a
part that scales with the speed and a part having a
fixed duration, to take I/O operations into
account.\par
The proposed method allows to compute the optimal
sequence of voltage/speed changes that approximates the
minimum continuous speed, which guarantees the
feasibility of a given set of real-time tasks, without
violating the deadline constraints. The analysis is
performed both under fixed and dynamic priority
assignments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "CPU energy; Real-time systems",
}
@Article{Koo:2009:FTG,
author = "Heon-Mo Koo and Prabhat Mishra",
title = "Functional test generation using design and property
decomposition techniques",
journal = j-TECS,
volume = "8",
number = "4",
pages = "32:1--32:??",
month = jul,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1550987.1550995",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 23 12:32:49 MDT 2009",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Functional verification of microprocessors is one of
the most complex and expensive tasks in the current
system-on-chip design methodology. Simulation using
functional test vectors is the most widely used form of
processor validation. A significant bottleneck in the
validation of such systems is the lack of automated
techniques for directed test generation. While existing
model checking--based approaches have proposed several
promising ideas for automated test generation, many
challenges remain in applying them to industrial
microprocessors. The time and resources required for
test generation using existing model checking--based
techniques can be prohibitively large. This article
presents an efficient test generation technique using
decompositional model checking. The contribution of the
article is the development of both property and design
decomposition procedures for efficient test generation
of pipelined processors. Our experimental results using
a multi-issue MIPS processor and an industrial
processor based on Power Architecture\TM{} Technology
demonstrate several orders-of-magnitude reduction in
validation effort by drastically reducing both test
generation time and test program length.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "design decomposition; functional validation; Model
checking; pipelined processor; property decomposition;
test generation",
}
@Article{Plaks:2009:GECa,
author = "Toomas P. Plaks and Neil Bergmann and Bernard
Pottier",
title = "Guest editorial {CAPA'08} configurable computing:
{Configuring} algorithms, processes, and architecture
issue {I}: {Configuring} algorithms and processes",
journal = j-TECS,
volume = "9",
number = "1",
pages = "1:1--1:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ferri:2009:RIF,
author = "B. H. Ferri and A. A. Ferri",
title = "Reconfiguration of {IIR} filters in response to
computer resource availability",
journal = j-TECS,
volume = "9",
number = "1",
pages = "2:1--2:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2009:TTD,
author = "Xiaojun Wang and Miriam Leeser",
title = "A truly two-dimensional systolic array {FPGA}
implementation of {QR} decomposition",
journal = j-TECS,
volume = "9",
number = "1",
pages = "3:1--3:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DoCarmoLucas:2009:ADF,
author = "Amilcar {Do Carmo Lucas} and Henning Sahlbach and Sean
Whitty and Sven Heithecker and Rolf Ernst",
title = "Application development with the {FlexWAFE} real-time
stream processing architecture for {FPGAs}",
journal = j-TECS,
volume = "9",
number = "1",
pages = "4:1--4:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nahapetian:2009:AAS,
author = "Ani Nahapetian and Philip Brisk and Soheil Ghiasi and
Majid Sarrafzadeh",
title = "An approximation algorithm for scheduling on
heterogeneous reconfigurable resources",
journal = j-TECS,
volume = "9",
number = "1",
pages = "5:1--5:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Patterson:2009:SMB,
author = "C. Patterson and P. Athanas and M. Shelburne and J.
Bowen and J. Sur{\'\i}s and T. Dunham and J. Rice",
title = "Slotless module-based reconfiguration of embedded
{FPGAs}",
journal = j-TECS,
volume = "9",
number = "1",
pages = "6:1--6:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lloyd:2009:PSN,
author = "Scott Lloyd and Quinn Snell",
title = "A packet-switched network architecture for
reconfigurable computing",
journal = j-TECS,
volume = "9",
number = "1",
pages = "7:1--7:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lubbers:2009:RMP,
author = "Enno L{\"u}bbers and Marco Platzner",
title = "{ReconOS}: {Multithreaded} programming for
reconfigurable computers",
journal = j-TECS,
volume = "9",
number = "1",
pages = "8:1--8:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2009:SFB,
author = "Jian Huang and Matthew Parris and Jooheung Lee and
Ronald F. Demara",
title = "Scalable {FPGA}-based architecture for {DCT}
computation using dynamic partial reconfiguration",
journal = j-TECS,
volume = "9",
number = "1",
pages = "9:1--9:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:40:57 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Plaks:2009:GECb,
author = "Toomas P. Plaks and Neil Bergmann and Bernard
Pottier",
title = "Guest editorial {CAPA'08 Configurable} computing:
{Configuring} algorithms, processes, and architecture
{Issue II}: {Configuring} hardware architecture",
journal = j-TECS,
volume = "9",
number = "2",
pages = "10:1--10:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:00 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alle:2009:RRR,
author = "Mythri Alle and Keshavan Varadarajan and Alexander
Fell and Ramesh Reddy C. and Nimmy Joseph and Saptarsi
Das and Prasenjit Biswas and Jugantor Chetia and Adarsh
Rao and S. K. Nandy and Ranjani Narayan",
title = "{REDEFINE}: {Runtime} reconfigurable polymorphic
{ASIC}",
journal = j-TECS,
volume = "9",
number = "2",
pages = "11:1--11:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:00 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Banerjee:2009:FPU,
author = "Pritha Banerjee and Susmita Sur-Kolay and Arijit
Bishnu and Sandip Das and Subhas C. Nandy and Subhasis
Bhattacharjee",
title = "{FPGA} placement using space-filling curves: {Theory}
meets practice",
journal = j-TECS,
volume = "9",
number = "2",
pages = "12:1--12:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:00 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Beckett:2009:PSM,
author = "Paul Beckett",
title = "Power scalability in a mesh-connected reconfigurable
architecture",
journal = j-TECS,
volume = "9",
number = "2",
pages = "13:1--13:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:00 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2009:STT,
author = "Weisheng Zhao and Eric Belhaire and Claude Chappert
and Pascale Mazoyer",
title = "Spin transfer torque {(STT)-MRAM--based} runtime
reconfiguration {FPGA} circuit",
journal = j-TECS,
volume = "9",
number = "2",
pages = "14:1--14:??",
month = oct,
year = "2009",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:00 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2010:CPV,
author = "Hyung Sun Lee and Byung Kook Kim",
title = "Coscheduling of processor voltage and control task
period for energy-efficient control systems",
journal = j-TECS,
volume = "9",
number = "3",
pages = "15:1--15:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reddy:2010:CPE,
author = "Rakesh Reddy and Peter Petrov",
title = "Cache partitioning for energy-efficient and
interference-free embedded multitasking",
journal = j-TECS,
volume = "9",
number = "3",
pages = "16:1--16:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Geelen:2010:MES,
author = "Bert Geelen and Vissarion Ferentinos and Francky
Catthoor and Gauthier Lafruit and Diederik Verkest and
Rudy Lauwereins and Thanos Stouraitis",
title = "Modeling and exploiting spatial locality trade-offs in
wavelet-based applications under varying resource
requirements",
journal = j-TECS,
volume = "9",
number = "3",
pages = "17:1--17:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bueno:2010:ORA,
author = "David Bueno and Chris Conger and Alan D. George",
title = "Optimizing {rapidIO} architectures for onboard
processing",
journal = j-TECS,
volume = "9",
number = "3",
pages = "18:1--18:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Inoue:2010:RSC,
author = "Hiroaki Inoue and Junji Sakai and Masato Edahiro",
title = "A robust seamless communication architecture for
next-generation mobile terminals on multi-{CPU}
{SoCs}",
journal = j-TECS,
volume = "9",
number = "3",
pages = "19:1--19:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Manzanares:2010:CER,
author = "Adam Manzanares and Xiaojun Ruan and Shu Yin and Xiao
Qin and Adam Roth and Mais Najim",
title = "Conserving energy in real-time storage systems with
{I/O} burstiness",
journal = j-TECS,
volume = "9",
number = "3",
pages = "20:1--20:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Courbot:2010:EBD,
author = "Alexandre Courbot and Gilles Grimaud and Jean-Jacques
Vandewalle",
title = "Efficient off-board deployment and customization of
virtual machine-based embedded systems",
journal = j-TECS,
volume = "9",
number = "3",
pages = "21:1--21:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xue:2010:IRP,
author = "Chun Jason Xue and Jingtong Hu and Zili Shao and Edwin
Sha",
title = "Iterational retiming with partitioning: {Loop}
scheduling with complete memory latency hiding",
journal = j-TECS,
volume = "9",
number = "3",
pages = "22:1--22:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cho:2010:LFS,
author = "Hyeonjoong Cho and Binoy Ravindran and E. Douglas
Jensen",
title = "Lock-free synchronization for dynamic embedded
real-time systems",
journal = j-TECS,
volume = "9",
number = "3",
pages = "23:1--23:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Musoll:2010:CEL,
author = "Enric Musoll",
title = "A cost-effective load-balancing policy for tile-based,
massive multi-core packet processors",
journal = j-TECS,
volume = "9",
number = "3",
pages = "24:1--24:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guang:2010:HAM,
author = "Liang Guang and Ethiopia Nigussie and Pekka Rantala
and Jouni Isoaho and Hannu Tenhunen",
title = "Hierarchical agent monitoring design approach towards
self-aware parallel systems-on-chip",
journal = j-TECS,
volume = "9",
number = "3",
pages = "25:1--25:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{McLoughlin:2010:RTR,
author = "Ian Vince McLoughlin and Timo Rolf Bretschneider",
title = "Reliability through redundant parallelism for
micro-satellite computing",
journal = j-TECS,
volume = "9",
number = "3",
pages = "26:1--26:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2010:OMC,
author = "Lei Yang and Robert P. Dick and Haris Lekatsas and
Srimat Chakradhar",
title = "Online memory compression for embedded systems",
journal = j-TECS,
volume = "9",
number = "3",
pages = "27:1--27:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cesana:2010:MBM,
author = "Ulpian Cesana and Zhen He",
title = "Multi-buffer manager: {Energy-efficient} buffer
manager for databases on flash memory",
journal = j-TECS,
volume = "9",
number = "3",
pages = "28:1--28:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tichy:2010:GAF,
author = "Milan Tichy and Jan Schier and David Gregg",
title = "{GSFAP} adaptive filtering using log arithmetic for
resource-constrained embedded systems",
journal = j-TECS,
volume = "9",
number = "3",
pages = "29:1--29:??",
month = feb,
year = "2010",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 15 18:41:02 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2010:HPO,
author = "Lei Yang and Robert P. Dick and Haris Lekatsas and
Srimat Chakradhar",
title = "High-performance operating system controlled online
memory compression",
journal = j-TECS,
volume = "9",
number = "4",
pages = "30:1--30:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721696",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Online memory compression is a technology that
increases the amount of memory available to
applications by dynamically compressing and
decompressing their working datasets on demand. It has
proven extremely useful in embedded systems with tight
physical RAM constraints. The technology can be used to
increase functionality, reduce size, and reduce cost,
without modifying applications or hardware. This
article presents a new software-based online memory
compression algorithm for embedded systems. In
comparison with the best algorithms used in online
memory compression, our new algorithm has a competitive
compression ratio but is twice as fast. In addition, we
describe several practical problems encountered in
developing an online memory compression infrastructure
and present solutions. We present a method of
adaptively managing the uncompressed and compressed
memory regions during application execution. This
memory management scheme adapts to the predicted memory
requirements of applications. It permits efficient
compression for a wide range of applications. We have
evaluated our techniques on a portable embedded device
and have found that the memory available to
applications can be increased by 2.5\times with
negligible performance and power consumption penalties,
and with no changes to hardware or applications. Our
techniques allow existing applications to execute with
less physical memory. They also allow applications with
larger working datasets to execute on unchanged
embedded system hardware, thereby increasing
functionality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "compression; Embedded system; memory",
}
@Article{Wu:2010:SAF,
author = "Chin-Hsien Wu",
title = "A self-adjusting flash translation layer for
resource-limited embedded systems",
journal = j-TECS,
volume = "9",
number = "4",
pages = "31:1--31:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721697",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The capacity of flash memory storage systems has been
growing at a speed similar to many other storage
systems. In order to properly manage the product cost,
vendors face serious challenges in resource-limited
embedded systems. In this article, a self-adjusting
flash translation layer is proposed with low memory
requirements. The objective of the design is to provide
efficient address mapping and low garbage collection
overhead, while controlling main memory usage of the
flash translation layer. The capability of the design
is evaluated over realistic workloads and benchmarks.
System performance is also guaranteed under low memory
requirements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "embedded systems; Flash memory; flash translation
layer; storage systems",
}
@Article{Irturk:2010:GAG,
author = "Ali Irturk and Bridget Benson and Shahnam Mirzaei and
Ryan Kastner",
title = "{GUSTO}: an automatic generation and optimization tool
for matrix inversion architectures",
journal = j-TECS,
volume = "9",
number = "4",
pages = "32:1--32:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721698",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Matrix inversion is a common function found in many
algorithms used in wireless communication systems. As
FPGAs become an increasingly attractive platform for
wireless communication, it is important to understand
the trade-offs in designing a matrix inversion core on
an FPGA. This article describes a matrix inversion core
generator tool, GUSTO, that we developed to ease the
design space exploration across different matrix
inversion architectures. GUSTO is the first tool of its
kind to provide automatic generation of a variety of
general-purpose matrix inversion architectures with
different parameterization options. GUSTO also provides
an optimized application-specific architecture with an
average of 59\% area decrease and 3X throughput
increase over its general-purpose architecture. The
optimized architectures generated by GUSTO provide
comparable results to published matrix inversion
architecture implementations, but offer the advantage
of providing the designer the ability to study the
trade-offs between architectures with different design
parameters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "design space exploration; Field programmable gate
arrays (FPGAs); matrix inversion",
}
@Article{Yu:2010:FSB,
author = "Yue Yu and Shangping Ren and Ophir Frieder",
title = "Feasibility of semiring-based timing constraints",
journal = j-TECS,
volume = "9",
number = "4",
pages = "33:1--33:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721699",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Real-time and embedded applications often involve
different types of timing constraints, such as
precedence constraints and real-time constraints. As
real-time and embedded applications further advance,
new timing constraint types are emerging as well.
Recent research on interval-based timing constraints is
an example. Thus, it is important to have a uniformed
timing constraint representation so that a generalized
approach can be developed to analyze the variant
constraint types.\par
A semiring-based timing constraint model is introduced
to generalize the representations of different
constraint types. Under this model, we develop an
algorithm to check the satisfaction feasibility for a
given set of semiring-based timing constraints. This
algorithm provides better performance in the average
case as compared to applying the Bellman-Ford algorithm
directly on the constraint set.\par
In addition, for a set of feasible semiring-based
timing constraints, event occurrence points that
satisfy the constraint set form a (hyperdimension)
feasible region. For the given two sets of timing
constraints, we develop a necessary and sufficient
condition to testify whether the two constraint sets'
feasible regions have an inclusion relation. If one
feasible region is included in the other, we know that
the real-time event occurrences that satisfy the
included constraint set will necessarily satisfy the
including set.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "timing constraint feasibility analysis; Timing
constraints",
}
@Article{Tahaee:2010:PAP,
author = "Seyed-Abdoreza Tahaee and Amir Hossein Jahangir",
title = "A polynomial algorithm for partitioning problems",
journal = j-TECS,
volume = "9",
number = "4",
pages = "34:1--34:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721700",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article takes a theoretical approach to focus on
the algorithmic properties of hardware/software
partitioning. It proposes a method with polynomial
complexity to find the global optimum of an NP-hard
model partitioning problem for 75\% of occurrences
under some practical conditions. The global optimum is
approached with a lower bound distance for the
remaining 25\%. Furthermore, this approach ensures
finding the 2-approximate of the global optimum
partition in 97\% of instances where technical
assumptions exist. The strategy is based on
intelligently changing the parameters of the polynomial
model of the partitioning problem to force it to
produce (or approach) the exact solution to the NP-hard
model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "hardware/software codesign; maximum flow minimum cut
problem; NP-hard problems; Partitioning problem",
}
@Article{Peng:2010:OWZ,
author = "Huan-Kai Peng and Youn-Long Lin",
title = "An optimal warning-zone-length assignment algorithm
for real-time and multiple-{QoS} on-chip bus
arbitration",
journal = j-TECS,
volume = "9",
number = "4",
pages = "35:1--35:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721701",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In an advanced System-on-Chip (SoC) for real-time
applications, the arbiter of its on-chip communication
subsystem needs to support multiple QoS criteria while
providing a hard real-time guarantee. To fulfill both
objectives, the arbitration algorithm must dynamically
switch between NonReal-Time (NRT) and Real-Time (RT)
modes such that use of the RT mode is minimized to best
accommodate the overall QoS criteria. In this article,
we define a model for this problem, and propose optimal
solutions to its associated problems with static and
dynamic warning-zone-length assignment. Compared with
previous works, the proposed approach enables a bus
arbiter to use much less RT mode in providing a
Real-Time (RT) guarantee and, therefore, gives the
arbiter more opportunity to employ non-RT modes to
achieve better overall QoS. Experimental results show
that the proposed approach reduces RT mode usage by as
much as 37.1\%. Moreover, that reduction in RT mode
usage helps cut the execution time by 27.0\% when
applying our approach to an industrial DRAM controller.
Another case study on an AMBA-compliant
ultra-high-resolution H.264 decoder IP shows that the
proposed approach reduces RT mode usage by 26.4\%,
which leads to an average reduction of 10.4\% in
decoding time. Finally, when implementing a 16 master
arbiter, it costs only 6.9K and 9.5K gates of overhead
using the proposed static and dynamic approach,
respectively. Therefore, the proposed approach is
suitable for real-time SoC applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "on-chip communication; QoS; real-time scheduling;
System-on-Chip",
}
@Article{Schlich:2010:MCS,
author = "Bastian Schlich",
title = "Model checking of software for microcontrollers",
journal = j-TECS,
volume = "9",
number = "4",
pages = "36:1--36:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721702",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The interest of industries in model checking software
for microcontrollers is increasing. However, there are
currently no appropriate tools that can be applied by
embedded systems developers for the direct verification
of software for microcontrollers without the need for
manual modeling. This article describes a new approach
to model checking software for microcontrollers, which
verifies the assembly code of the software. The state
space is built using a tailored simulator, which
abstracts from time, handles nondeterminism, and
creates an overapproximation of the behavior shown by
the real microcontroller. Within this simulator, we
apply abstraction techniques to tackle the
state-explosion problem. In our approach, we combine
different formal methods, namely, model checking,
static analysis, and abstract interpretation. We also
combine explicit and symbolic model checking
techniques. This article presents a case study using
several programs to demonstrate the efficiency of the
applied abstraction techniques and to show the
applicability of this approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Assembly code; formal verification; microcontroller;
model checking; static analysis",
}
@Article{Bombieri:2010:SND,
author = "Nicola Bombieri and Franco Fummi and Davide Quaglia",
title = "System\slash network design-space exploration based on
{TLM} for networked embedded systems",
journal = j-TECS,
volume = "9",
number = "4",
pages = "37:1--37:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721703",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a methodology for the design of
Networked Embedded Systems (NESs), which extends
Transaction Level Modeling (TLM) to perform
system/network design-space exploration. As a result, a
new design dimension is added to the traditional TLM
refinement process to represent network configuration
alternatives. Each network configuration can be used to
drive both architecture exploration and system
validation after each refinement step. A system/network
simulation taxonomy is investigated aiming at precisely
identifying the role of cosimulation in system/network
design-space exploration. Furthermore, a general
criterion to map functionalities to system and network
models is presented. As a case study, the proposed
methodology is applied to the design of a Voice-over-IP
client.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "networked embedded systems; Transaction level
modeling",
}
@Article{Lin:2010:SSA,
author = "Chang Hong Lin and Marilyn Wolf and Xenefon Koutsoukos
and Sandeep Neema and Janos Sztipanovits",
title = "System and software architectures of distributed smart
cameras",
journal = j-TECS,
volume = "9",
number = "4",
pages = "38:1--38:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721704",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we describe a distributed,
peer-to-peer gesture recognition system along with a
software architecture modeling technique and authority
control protocol for ubiquitous cameras. This system
performs gesture recognition in real time by combining
imagery from multiple cameras without using a central
server. We propose a system architecture that uses a
network of inexpensive cameras to perform in-network
video processing. A methodology for transforming
well-designed single-node algorithm to distributed
system is also proposed. Applications for ubiquitous
cameras can be modeled as the composition of a
finite-state machine of the system, functional
services, and middleware. A service-oriented software
architecture is proposed to dynamically reconfigure
services when system state changes. By exchanging data
and control messages between neighboring sensors, each
node can maintain broader view of the environment with
integrated video-processing results. Our prototype
system is built on Windows machines, and uses standard
video cameras as sensors and local network as a
communication channel.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Distributed cameras; smart camera; software
architecture",
}
@Article{Zhou:2010:MMS,
author = "Gang Zhou and Yafeng Wu and Ting Yan and Tian He and
Chengdu Huang and John A. Stankovic and Tarek F.
Abdelzaher",
title = "A multifrequency {MAC} specially designed for wireless
sensor network applications",
journal = j-TECS,
volume = "9",
number = "4",
pages = "39:1--39:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721705",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multifrequency media access control has been well
understood in general wireless ad hoc networks, while
in wireless sensor networks, researchers still focus on
single frequency solutions. In wireless sensor
networks, each device is typically equipped with a
single radio transceiver and applications adopt much
smaller packet sizes compared to those in general
wireless ad hoc networks. Hence, the multifrequency MAC
protocols proposed for general wireless ad hoc networks
are not suitable for wireless sensor network
applications, which we further demonstrate through our
simulation experiments. In this article, we propose
MMSN, which takes advantage of multifrequency
availability while, at the same time, takes into
consideration the restrictions of wireless sensor
networks. In MMSN, four frequency assignment options
are provided to meet different application
requirements. A scalable media access is designed with
efficient broadcast support. Also, an optimal
nonuniform back-off algorithm is derived and its
lightweight approximation is implemented in MMSN, which
significantly reduces congestion in the time
synchronized media access design. Through extensive
experiments, MMSN exhibits the prominent ability to
utilize parallel transmissions among neighboring nodes.
When multiple physical frequencies are available, it
also achieves increased energy efficiency,
demonstrating the ability to work against radio
interference and the tolerance to a wide range of
measured time synchronization errors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "media access control; multi-channel; radio
interference; time synchronization; Wireless sensor
networks",
}
@Article{Jung:2010:SFS,
author = "Dawoon Jung and Jeong-Uk Kang and Heeseung Jo and
Jin-Soo Kim and Joonwon Lee",
title = "Superblock {FTL}: a superblock-based {Flash
Translation Layer} with a hybrid address translation
scheme",
journal = j-TECS,
volume = "9",
number = "4",
pages = "40:1--40:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721706",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In NAND flash-based storage systems, an intermediate
software layer called a Flash Translation Layer (FTL)
is usually employed to hide the erase-before-write
characteristics of NAND flash memory. We propose a
novel superblock-based FTL scheme, which combines a set
of adjacent logical blocks into a superblock. In the
proposed Superblock FTL, superblocks are mapped at
coarse granularity, while pages inside the superblock
are mapped freely at fine granularity to any location
in several physical blocks. To reduce extra storage and
flash memory operations, the fine-grain mapping
information is stored in the spare area of NAND flash
memory. This hybrid address translation scheme has the
flexibility provided by fine-grain address translation,
while reducing the memory overhead to the level of
coarse-grain address translation. Our experimental
results show that the proposed FTL scheme significantly
outperforms previous block-mapped FTL schemes with
roughly the same memory overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "FTL; hybrid address translation; NAND flash memory;
storage system",
}
@Article{Klues:2010:LLD,
author = "Kevin Klues and Guoliang Xing and Chenyang Lu",
title = "Link layer driver architecture for unified radio power
management in wireless sensor networks",
journal = j-TECS,
volume = "9",
number = "4",
pages = "41:1--41:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721707",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless Sensor Networks (WSNs) represent a new
generation of networked embedded systems that must
achieve long lifetimes on scarce amounts of energy.
Since radio communication accounts for the primary
source of power drain in these networks, a large number
of different radio power management protocols have been
proposed. However, the lack of operating system support
for flexibly integrating them with a diverse set of
applications and network platforms has made them
difficult to use. This article focuses on providing
link layer support toward realizing a unified power
management architecture (UPMA) for WSNs. In contrast to
existing monolithic approaches, we provide (i) a set of
standard interfaces that separate link layer power
management protocols from common MAC level
functionality, (ii) an architectural framework that
allows applications to easily swap out different
power-management protocols depending on its needs, and
(iii) a mechanism for coordinating multiple
applications with different power management
requirements. We have implemented our approach on both
the Mica2 and Telosb radio drivers in TinyOS-2.0, the
second generation of the de facto standard operating
system for WSNs. Microbenchmark results show that our
approach can coordinate the power-management
requirements of multiple applications in a platform
independent fashion while incurring negligible
overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "architecture; framework; radio power management;
Wireless sensor networks",
}
@Article{Lee:2010:IHM,
author = "Jupyung Lee and Kyu Ho Park",
title = "Interrupt handler migration and direct interrupt
scheduling for rapid scheduling of interrupt-driven
tasks",
journal = j-TECS,
volume = "9",
number = "4",
pages = "42:1--42:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721708",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose two techniques that aim to
minimize the scheduling latency of high-priority
interrupt-driven tasks, named the Interrupt Handler
Migration (IHM) and Direct Interrupt Scheduling (DIS).
The IHM allows the interrupt handler to be migrated
from the interrupt handler thread to the corresponding
target process so that additional context switch can be
avoided and the cache hit ratio with respect to the
data generated by the interrupt handler can be
improved. In addition, the DIS allows the shortest path
reserved for urgent interrupt-process pairs to be laid
between the interrupt arrival and target process by
dividing a series of interrupt-driven operations into
nondeferrable and deferrable operations. Both the IHM
and DIS can be combined in a natural way and can
operate concurrently. These techniques can be applied
to all kinds of interrupt handlers with no modification
to them. The proposed techniques not only reduce the
scheduling latency, but also resolve the
interrupt-driven priority inversion problem.\par
We implemented a prototype in the Linux 2.6.19 kernel
after adding real-time patches. Experimental results
show that the scheduling latency is significantly
reduced by up to 84.2\% when both techniques are
applied together. When the Linux OS runs on an
ARM-based embedded CPU running at 200MHz, the
scheduling latency can become as low as 30$ \mu $ s,
which is much closer to the hardware-specific
limitations. By lowering the scheduling latency, the
limited CPU cycles can be consumed more for user-level
processes and less for system-level tasks, such as
interrupt handling and scheduling.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "interrupt handling; latency; Linux; Real-time
operating system; responsiveness; scheduling",
}
@Article{Tan:2010:MSE,
author = "Chiu C. Tan and Bo Sheng and Haodong Wang and Qun Li",
title = "{Microsearch}: a search engine for embedded devices
used in pervasive computing",
journal = j-TECS,
volume = "9",
number = "4",
pages = "43:1--43:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1721695.1721709",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 2 17:12:34 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present Microsearch, a search
system suitable for embedded devices used in ubiquitous
computing environments. Akin to a desktop search
engine, Microsearch indexes the information inside a
small device, and accurately resolves a user's queries.
Given the limited hardware, conventional search engine
design and algorithms cannot be used. We adopt
Information Retrieval (IR) techniques for query
resolution, and proposed a new space-efficient top-$k$
query resolution algorithm. A theoretical model of
Microsearch is given to better understand the
trade-offs in design parameters. Evaluation is done via
actual implementation on off-the-shelf hardware.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Embedded search engine; information retrieval;
pervasive computing",
}
@Article{Higuera-Toledano:2010:ISI,
author = "M. Teresa Higuera-Toledano and Doug Locke and Angelo
Corsaro",
title = "Introduction to special issue on {Java} technologies
for real-time and embedded systems",
journal = j-TECS,
volume = "10",
number = "1",
pages = "1:1--1:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814540",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DosSantos:2010:MPB,
author = "Osmar Marchi {Dos Santos} and Andy Wellings",
title = "Measuring and policing blocking times in real-time
systems",
journal = j-TECS,
volume = "10",
number = "1",
pages = "2:1--2:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814541",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In real-time systems, the execution-time overrun of a
thread may lead to a deadline being missed by the
thread or even others threads in the system. From a
fault tolerance perspective, both execution time
overruns and deadline misses can be considered timing
errors that could potentially cause a failure in the
system's ability to deliver its services in a timely
manner. In this context, the ideal is to detect the
error in the system as soon as possible, so that the
propagation of the error can be limited and error
recovery strategies can take place with more accurate
information. The run-time support mechanism usually
deployed for monitoring the timing requirements of
real-time systems is based on deadline monitoring, that
is, the system calls specific application code whenever
a deadline is violated. Recognizing that deadline
monitoring may not be enough for providing an adequate
level of fault tolerance for timing errors, major
real-time programming standards, like Ada, POSIX and
the Real-Time Specification for Java (RTSJ), have
proposed different mechanisms for monitoring the
execution time of threads. Nevertheless, in order to
provide a complete fault tolerance approach for timing
errors, the potential blocking time of threads also has
to be monitored. In this article, we propose mechanisms
for measuring and policing the blocking time of threads
in the context of both {\em basic priority
inheritance\/} and {\em priority ceiling protocols}.
The notion of {\em blocking-time clocks and timers\/}
for the POSIX standard is proposed, implemented and
evaluated in the open-source real-time operating system
MaRTE OS. Also, a {\em blocking time monitoring
model\/} for measuring and policing blocking times in
the RTSJ framework is specified. This model is
implemented and evaluated in the (RTSJ-compliant)
open-source middleware jRate, running on top of MaRTE
OS.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "error detection; POSIX standard; Real-time
specification for Java; timing errors",
}
@Article{Zerzelidis:2010:FFS,
author = "Alexandros Zerzelidis and Andy Wellings",
title = "A framework for flexible scheduling in the {RTSJ}",
journal = j-TECS,
volume = "10",
number = "1",
pages = "3:1--3:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814542",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a viable solution to introducing
flexible scheduling in the Real-Time specification for
Java (RTSJ), in the form of a flexible scheduling
framework. The framework allows the concurrent use of
multiple application-defined scheduling policies, each
scheduling a subset of the total set of threads.
Moreover, all threads, regardless of the policy under
which they are scheduled, are permitted to share common
resources. Thus, the framework can accommodate a
variety of interworking applications (soft, firm, and
hard) running under the RTSJ. The proposed approach is
a two-level scheduling framework, where the first level
is the RTSJ priority scheduler and the second level is
under application control. This article describes the
framework's protocol, examines the different types of
scheduling policies that can be supported, and
evaluates the proposed framework by measuring its
execution cost. A description of an application-defined
Earliest-Deadline-First (EDF) scheduler illustrates how
the interface can be used. Minimum backward-compatible
changes to the RTSJ specification are discussed to
motivate the required interface. The only assumptions
made about the underlying real-time operating system is
that it supports preemptive priority-based dispatching
of threads and that changes to priorities have
immediate effect.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "flexible scheduling; RTSJ; Scheduling framework",
}
@Article{Spring:2010:RAI,
author = "Jesper Honig Spring and Filip Pizlo and Jean Privat
and Rachid Guerraoui and Jan Vitek",
title = "{Reflexes}: {Abstractions} for integrating highly
responsive tasks into {Java} applications",
journal = j-TECS,
volume = "10",
number = "1",
pages = "4:1--4:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814543",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/csharp.bib;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Achieving submillisecond response times in a managed
language environment such as Java or C\# requires
overcoming significant challenges. In this article, we
propose Reflexes, a programming model and runtime
system infrastructure that lets developers seamlessly
mix highly responsive tasks and timing-oblivious Java
applications. Thus enabling gradual addition of
real-time features, to a non-real-time application
without having to resort to recoding the real-time
parts in a different language such as C or Ada.
Experiments with the Reflex prototype implementation
show that it is possible to run a real-time task with a
period of 45$ \mu $s with an accuracy of 99.996\%
(only 0.001\% worse than the corresponding C
implementation) in the presence of garbage collection
and heavy load ordinary Java threads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Java virtual machine; memory management; Real-time
systems",
}
@Article{Kim:2010:EAE,
author = "Minseong Kim and Andy Wellings",
title = "Efficient asynchronous event handling in the real-time
specification for {Java}",
journal = j-TECS,
volume = "10",
number = "1",
pages = "5:1--5:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814544",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Real-Time Specification for Java (RTSJ) is
becoming mature. It has been implemented, formed the
basis for research and used in serious applications.
Some strengths and weaknesses are emerging. One of the
areas that requires further elaboration is asynchronous
event handling (AEH). The primary goal for handlers in
the RTSJ is to have a lightweight concurrency
mechanism. Some implementation will, however, simply
map a handler to a real-time thread and this results in
undermining the original motivations and introduces
performance penalties. However it is generally unclear
how to map handlers to real-time threads effectively.
Also the support for nonblocking handlers in the RTSJ
is criticized as lacking in configurability as
implementations are unable to take advantage of them.
This article, therefore, examines the AEH techniques
used in some popular RTSJ implementations and proposes
two efficient AEH models for the RTSJ. We then define
formal models of the RTSJ AEH implementations using the
automata formalism provided by the UPPAAL model
checking tool. Using the automata models, their
properties are explored and verified. In the proposed
models, blocking and nonblocking handlers are serviced
by different algorithms. In this way, it is possible to
assign a real-time thread to a handler at the right
time in the right place while maintaining the fewest
possible threads overall and to give a certain level of
configurability to AEH. We also have implemented the
proposed models on an existing RTSJ implementation,
jRate and executed a set of performance tests that
measure their respective dispatch and multiple-handler
completion latencies. The results from the tests and
the verifications indicate that the proposed models
require fewer threads on average with better
performance than other approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "asynchronous event handling; blocking handler;
multiple-server switching phenomenon; nonblocking
handler; RTSJ",
}
@Article{Schoeberl:2010:NRT,
author = "Martin Schoeberl and Wolfgang Puffitsch",
title = "Nonblocking real-time garbage collection",
journal = j-TECS,
volume = "10",
number = "1",
pages = "6:1--6:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814545",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A real-time garbage collector has to fulfill two basic
properties: ensure that programs with bounded
allocation rates do not run out of memory and provide
short blocking times. Even for incremental garbage
collectors, two major sources of blocking exist,
namely, root scanning and heap compaction. Finding root
nodes of an object graph is an integral part of tracing
garbage collectors and cannot be circumvented. Heap
compaction is necessary to avoid probably unbounded
heap fragmentation, which in turn would lead to
unacceptably high memory consumption. In this article,
we propose solutions to both issues.\par
Thread stacks are local to a thread, and root scanning,
therefore, only needs to be atomic with respect to the
thread whose stack is scanned. This fact can be
utilized by either blocking only the thread whose stack
is scanned, or by delegating the responsibility for
root scanning to the application threads. The latter
solution eliminates blocking due to root scanning
completely. The impact of this solution on the
execution time of a garbage collector is shown for two
different variants of such a root scanning
algorithm.\par
During heap compaction, objects are copied. Copying is
usually performed atomically to avoid interference with
application threads, which could render the state of an
object inconsistent. Copying of large objects and
especially large arrays introduces long blocking times
that are unacceptable for real-time systems. In this
article, an interruptible copy unit is presented that
implements nonblocking object copy. The unit can be
interrupted after a single word move.\par
We evaluate a real-time garbage collector that uses the
proposed techniques on a Java processor. With this
garbage collector, it is possible to run high-priority
hard real-time tasks at 10 kHz parallel to the garbage
collection task on a 100 MHz system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Garbage collection; nonblocking copying; real-time;
root scanning",
}
@Article{Basanta-Val:2010:NHR,
author = "Pablo Basanta-Val and Marisol Garc{\'\i}a-Valls and
Iria Est{\'e}vez-Ayres",
title = "{No-Heap Remote Objects} for distributed real-time
{Java}",
journal = j-TECS,
volume = "10",
number = "1",
pages = "7:1--7:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814546",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents an approach to providing
real-time support for Java's Remote Method Invocation
(RMI) and its integration with the RTSJ memory model in
order to leave out garbage collection. A new construct
for remote objects, called {\em No-heap Remote
object\/} ({\em NhRo\/}), is introduced. The use of a
NhRo guarantees that memory required to perform a
remote invocation (at the server side) does not use
heap memory. Thus, the aim is to avoid garbage
collection in the remote invocation process, improving
predictability and memory isolation of distributed
Java-based real-time applications. The article presents
the bare model and the main programming patterns that
are associated with the NhRo model. Sun RMI
implementation has been modified to integrate the NhRo
model in both static and dynamic environments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "distributed real-time Java; DRTSJ; Real-time Java;
real-time remote objects; region-based memory
management; RTSJ",
}
@Article{Curley:2010:RDT,
author = "Edward Curley and Binoy Ravindran and Jonathan
Anderson and E. Douglas Jensen",
title = "Recovering from distributable thread failures in
distributed real-time {Java}",
journal = j-TECS,
volume = "10",
number = "1",
pages = "8:1--8:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814547",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We consider the problem of recovering from the
failures of distributable threads (``threads'') in
distributed real-time systems that operate under
runtime uncertainties including those on thread
execution times, thread arrivals, and node failure
occurrences. When a thread experiences a node failure,
the result is a broken thread having an orphan. Under a
termination model, the orphans must be detected and
aborted, and exceptions must be delivered to the
farthest, contiguous surviving thread segment for
resuming thread execution. Our application/scheduling
model includes the proposed distributable thread
programming model for the emerging Distributed
Real-Time Specification for Java (DRTSJ), together with
an exception-handler model. Threads are subject to
time/utility function (TUF) time constraints and an
utility accrual (UA) optimality criterion. A key
underpinning of the TUF/UA scheduling paradigm is the
notion of ``best-effort'' where higher importance
threads are always favored over lower importance ones,
irrespective of thread urgency as specified by their
time constraints. We present a thread scheduling
algorithm called HUA and a thread integrity protocol
called TPR. We show that HUA and TPR bound the orphan
cleanup and recovery time with bounded loss of the
best-effort property. Our implementation experience for
HUA/TPR in the Reference Implementation of the proposed
programming model for the DRTSJ demonstrates the
algorithm/protocol's effectiveness.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "distributable thread; Distributed; distributed
scheduling; Java; real-time; thread integrity",
}
@Article{Pitter:2010:RTJ,
author = "Christof Pitter and Martin Schoeberl",
title = "A real-time {Java} chip-multiprocessor",
journal = j-TECS,
volume = "10",
number = "1",
pages = "9:1--9:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814548",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Chip-multiprocessors are an emerging trend for
embedded systems. In this article, we introduce a
real-time Java multiprocessor called JopCMP. It is a
symmetric shared-memory multiprocessor, and consists of
up to eight Java Optimized Processor (JOP) cores, an
arbitration control device, and a shared memory. All
components are interconnected via a system on chip bus.
The arbiter synchronizes the access of multiple CPUs to
the shared main memory. In this article, three
different arbitration policies are presented,
evaluated, and compared with respect to their real-time
and average-case performance: a fixed priority, a
fair-based, and a time-sliced arbiter.\par
Tasks running on different CPUs of a
chip-multiprocessor (CMP) influence each others'
execution times when accessing a shared memory.
Therefore, the system needs an arbiter that is able to
limit the worst-case execution time of a task running
on a CPU, even though tasks executing simultaneously on
other CPUs access the main memory. Our research shows
that timing analysis is in fact possible for
homogeneous multiprocessor systems with a shared
memory. The timing analysis of tasks, executing on the
CMP using time-sliced memory arbitration, leads to
viable worst-case execution time bounds.\par
The time-sliced arbiter divides the memory access time
into equal time slots, one time slot for each CPU. This
memory arbitration scheme allows for a calculation of
upper bounds of Java application worst-case execution
times, depending on the number of CPUs, the time slot
size, and the memory access time. Examples of
worst-case execution time calculation are presented,
and the analyzed results of a real-world application
task are compared to measured execution time results.
Finally, we evaluate the tradeoffs when using a
time-predictable solution compared to using
average-case optimized chip-multiprocessors, applying
three different benchmarks. These experiments are
carried out by executing the programs on the CMP
prototype.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Java processor; multiprocessor; Real-time system;
shared memory; worst-case execution time",
}
@Article{Kaiser:2010:ISI,
author = "William Kaiser and Majid Sarrafzadeh",
title = "Introduction to special issue on wireless health",
journal = j-TECS,
volume = "10",
number = "1",
pages = "10:1--10:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814549",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ko:2010:MME,
author = "Jeonggil Ko and Jong Hyun Lim and Yin Chen and
Rv{\~a}zvan Musvaloiu-E and Andreas Terzis and Gerald
M. Masson and Tia Gao and Walt Destler and Leo Selavo
and Richard P. Dutton",
title = "{MEDiSN}: {Medical} emergency detection in sensor
networks",
journal = j-TECS,
volume = "10",
number = "1",
pages = "11:1--11:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814550",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Staff shortages and an increasingly aging population
are straining the ability of emergency departments to
provide high quality care. At the same time, there is a
growing concern about hospitals' ability to provide
effective care during disaster events. For these
reasons, tools that automate patient monitoring have
the potential to greatly improve efficiency and quality
of health care. Towards this goal, we have developed
{\em MEDiSN}, a wireless sensor network for monitoring
patients' physiological data in hospitals and during
disaster events. MEDiSN comprises {\em Physiological
Monitors\/} (PMs), which are custom-built, patient-worn
motes that sample, encrypt, and sign physiological data
and {\em Relay Points\/} (RPs) that self-organize into
a multi-hop wireless backbone for carrying
physiological data. Moreover, MEDiSN includes a
back-end server that persistently stores medical data
and presents them to authenticated GUI clients. The
combination of MEDiSN's two-tier architecture and
optimized rate control protocols allows it to address
the compound challenge of reliably delivering large
volumes of data while meeting the application's QoS
requirements. Results from extensive simulations,
testbed experiments, and multiple pilot hospital
deployments show that MEDiSN can scale from tens to at
least five hundred PMs, effectively protect application
packets from congestive and corruptive losses, and
deliver medically actionable data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Medical sensor networks; wireless physiological
monitoring",
}
@Article{Coronato:2010:FSW,
author = "Antonio Coronato and Giuseppe {De Pietro}",
title = "Formal specification of wireless and pervasive
healthcare applications",
journal = j-TECS,
volume = "10",
number = "1",
pages = "12:1--12:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814551",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless and pervasive healthcare applications
typically present critical requirements from the point
of view of functional correctness, reliability,
availability, security, and safety. In contrast to the
case of classic safety critical applications, the
behavior of wireless and pervasive applications is
affected by the movements and location of users and
resources.\par
This article presents a methodology to formally express
requirements in safety critical wireless and pervasive
healthcare applications in order to achieve a higher
degree of dependability. In particular, it will be
shown how it is possible to formalize and constrict
mobility characteristics by combining, and in some
cases extending, several formal methods. The article
also describes a rigorous specification process.
Finally, it concludes with a case study of a real
safety critical pervasive healthcare application that
is going to be deployed in a city hospital.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "Formal specification; methodologies and tools;
wireless and pervasive healthcare applications",
}
@Article{Waluyo:2010:MMB,
author = "Agustinus Borgy Waluyo and Wee-Soon Yeoh and Isaac Pek
and Yihan Yong and Xiang Chen",
title = "{MobiSense}: {Mobile} body sensor network for
ambulatory monitoring",
journal = j-TECS,
volume = "10",
number = "1",
pages = "13:1--13:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814552",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces MobiSense, a novel mobile
health monitoring system for ambulatory patients.
MobiSense resides in a mobile device, communicates with
a set of body sensor devices attached to the wearer,
and processes data from these sensors. MobiSense is
able to detect body postures such as lying, sitting,
and standing, and walking speed, by utilizing our
rule-based heuristic activity classification scheme
based on the extended Kalman (EK) Filtering algorithm.
Furthermore, the proposed system is capable of
controlling each of the sensor devices, and performing
resource reconfiguration and management schemes (sensor
sleep/wake-up mode). The architecture of MobiSense is
highlighted and discussed in depth. The system has been
implemented, and its prototype is showcased. We have
also carried out rigorous performance measurements of
the system including real-time and query latency as
well as the power consumption of the sensor nodes. The
accuracy of our activity classifier scheme has been
evaluated by involving several human subjects, and we
found promising results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "ambulatory patient monitoring; pervasive healthcare;
wireless body sensor network; Wireless health system",
}
@Article{Quwaider:2010:TPA,
author = "Muhannad Quwaider and Jayanthi Rao and Subir Biswas",
title = "Transmission power assignment with postural position
inference for on-body wireless communication links",
journal = j-TECS,
volume = "10",
number = "1",
pages = "14:1--14:??",
month = aug,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1814539.1814553",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 30 15:29:45 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a novel transmission power
assignment mechanism for on-body wireless links formed
between severely energy-constrained wearable and
implanted sensors. The key idea is to develop a
measurement-based framework in which the postural
position as it pertains to a given wireless link is
first inferred based on the measured RF signal strength
and packet drops. Then optimal power assignment is done
by fitting those measurement results into a model
describing the relationship between the assigned power
and the resulting signal strength. A closed loop power
control mechanism is then added for iterative
convergence to the optimal power level as a response to
both intra-and-inter posture body movements. This
provides a practical paradigm for on-body power
assignment, which cannot leverage the existing
mechanisms in the literature that rely on localization,
which is not realistic for on-body sensors. Extensive
experimental results are provided to demonstrate the
model building and algorithm performance on a prototype
body area network. The proposed mechanism has also been
compared with a number of other closed loop mechanisms
and an experimental benchmark.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "adaptive power control; Body area network; link
quality measurement; radio link quality",
}
@Article{Basten:2010:EMD,
author = "Twan Basten and Rolf Ernst",
title = "Editorial: {Model-driven} embedded-system design",
journal = j-TECS,
volume = "10",
number = "2",
pages = "15:1--15:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880051",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Geilen:2010:SDS,
author = "Marc Geilen",
title = "Synchronous dataflow scenarios",
journal = j-TECS,
volume = "10",
number = "2",
pages = "16:1--16:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880052",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Synchronous Dataflow (SDF) model of computation by
Lee and Messerschmitt has become popular for modeling
concurrent applications on a multiprocessor platform.
It is used to obtain a guaranteed, predictable
performance. The model, on the other hand, is quite
restrictive in its expressivity, making it less
applicable to many modern, more dynamic applications. A
common technique to deal with dynamic behavior is to
consider different scenarios in separation. This
analysis is, however, currently limited mainly to
sequential applications. In this article, we present a
new analysis approach that allows analysis of
synchronous dataflow models across different scenarios
of operation. The dataflow graphs corresponding to the
different scenarios can be completely different.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wiggers:2010:BCC,
author = "Maarten H. Wiggers and Marco J. G. Bekooij and Gerard
J. M. Smit",
title = "Buffer capacity computation for throughput-constrained
modal task graphs",
journal = j-TECS,
volume = "10",
number = "2",
pages = "17:1--17:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880053",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Increasingly, stream-processing applications include
complex control structures to better adapt to changing
conditions in their environment. This adaptivity often
results in task execution rates that are dependent on
the processed stream. Current approaches to compute
buffer capacities that are sufficient to satisfy a
throughput constraint have limited applicability in
case of data-dependent task execution rates. In this
article, we present a dataflow model that allows tasks
to have loops with an unbounded number of iterations.
For instances of this dataflow model, we present
efficient checks on their validity. Furthermore, we
present an efficient algorithm to compute buffer
capacities that are sufficient to satisfy a throughput
constraint.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Falk:2010:ASA,
author = "Joachim Falk and Christian Zebelein and Joachim
Keinert and Christian Haubelt and Juergen Teich and
Shuvra S. Bhattacharyya",
title = "Analysis of {SystemC} actor networks for efficient
synthesis",
journal = j-TECS,
volume = "10",
number = "2",
pages = "18:1--18:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880054",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Applications in the signal processing domain are often
modeled by dataflow graphs. Due to heterogeneous
complexity requirements, these graphs contain both
dynamic and static dataflow actors. In previous work,
we presented a generalized clustering approach for
these heterogeneous dataflow graphs in the presence of
unbounded buffers. This clustering approach allows the
application of static scheduling methodologies for
static parts of an application during embedded software
generation for multiprocessor systems. It
systematically exploits the predictability and
efficiency of the static dataflow model to obtain
latency and throughput improvements. In this article,
we present a generalization of this clustering
technique to dataflow graphs with bounded buffers,
therefore enabling synthesis for embedded systems
without dynamic memory allocation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Razavi:2010:SAB,
author = "Niloofar Razavi and Razieh Behjati and Hamideh Sabouri
and Ehsan Khamespanah and Amin Shali and Marjan
Sirjani",
title = "{Sysfier}: {Actor-based} formal verification of
{SystemC}",
journal = j-TECS,
volume = "10",
number = "2",
pages = "19:1--19:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880055",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "SystemC is a system-level modeling language that can
be used effectively for hardware/software co-design.
Since a major goal of SystemC is to enable verification
at higher levels of abstraction, the tendency is now
directing to introducing formal verification approaches
for SystemC. In this article, we propose an approach
for formal verification of SystemC designs, and provide
the semantics of SystemC using Labeled Transition
Systems (LTS) for this purpose. An actor-based
language, Rebeca, is used as an intermediate language.
SystemC designs are mapped to Rebeca models and then
Rebeca verification toolset is used to verify LTL and
CTL properties. To tackle the state-space explosion,
Rebeca model checkers offer some reduction policies
that make them appropriate for SystemC verification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Adler:2010:CBM,
author = "Rasmus Adler and Ina Schaefer and Mario Trapp and Arnd
Poetzsch-Heffter",
title = "Component-based modeling and verification of dynamic
adaptation in safety-critical embedded systems",
journal = j-TECS,
volume = "10",
number = "2",
pages = "20:1--20:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880056",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Adaptation is increasingly used in the development of
safety-critical embedded systems, in particular to
reduce hardware needs and to increase availability.
However, composing a system from many reconfigurable
components can lead to a huge number of possible system
configurations, inducing a complexity that cannot be
handled during system design. To overcome this problem,
we propose a new component-based modeling and
verification method for adaptive embedded systems. The
component-based modeling approach facilitates
abstracting a composition of components to a
hierarchical component. In the hierarchical component,
the number of possible configurations of the
composition is reduced to a small number of
hierarchical configurations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Driver:2010:MES,
author = "Cormac Driver and Sean Reilly and {\'E}amonn Linehan
and Vinny Cahill and Siobh{\'a}n Clarke",
title = "Managing embedded systems complexity with
aspect-oriented model-driven engineering",
journal = j-TECS,
volume = "10",
number = "2",
pages = "21:1--21:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880057",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Model-driven engineering addresses issues of platform
heterogeneity and code quality through the use of
high-level system models and subsequent automatic
transformations. Adoption of the model-driven software
engineering paradigm for embedded systems necessitates
specification of appropriate models of often complex
systems. Modern embedded systems are typically composed
of multiple functional and nonfunctional concerns, with
the nonfunctional concerns (e.g., timing and
performance) typically affecting the design and
implementation of the functional concerns. The presence
of crosscutting concerns makes specification of
adequate platform-independent models a significant
challenge. Aspect-oriented software development is a
separation of concerns technique that decomposes
systems into distinct features with minimal overlap.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schliecker:2010:RTP,
author = "Simon Schliecker and Rolf Ernst",
title = "Real-time performance analysis of multiprocessor
systems with shared memory",
journal = j-TECS,
volume = "10",
number = "2",
pages = "22:1--22:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880058",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Predicting timing behavior is key to reliable
real-time system design and verification, but becomes
increasingly difficult for current multiprocessor
systems on chip. The integration of formerly separate
functionality into a single multicore system introduces
new intercore timing dependencies resulting from the
common use of the now shared resources. This feedback
of system timing on local timing makes traditional
performance analysis approaches inappropriate. This
article presents a general methodology to model the
shared resource traffic and consider its effect on the
local task execution. The aggregate busy time captures
the timing of multiple accesses to a shared memory far
better than the traditional models that focus on the
timing of individual events.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2010:DAS,
author = "Euiseong Seo and Sangwon Kim and Seonyeong Park and
Joonwon Lee",
title = "Dynamic alteration schemes of real-time schedules for
{I/O} device energy efficiency",
journal = j-TECS,
volume = "10",
number = "2",
pages = "23:1--23:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880059",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many I/O devices provide multiple power states known
as the dynamic power management (DPM) feature. However,
activating from sleep state requires significant
transition time and this obstructs utilizing DPM in
nonpreemptive real-time systems. This article suggests
nonpreemptive real-time task scheduling schemes
maximizing the effectiveness of the I/O device DPM
support. First, we introduce a runtime schedulability
check algorithm for nonpreemptive real-time systems
that can check whether a modification from a valid
schedule is still valid. By using this, we suggest
three heuristic algorithms. The first algorithm
reorders the execution sequence of tasks according to
the similarity of their required device sets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cabodi:2010:BSF,
author = "Gianpiero Cabodi and Marco Murciano and Massimo
Violante",
title = "Boosting software fault injection for dependability
analysis of real-time embedded applications",
journal = j-TECS,
volume = "10",
number = "2",
pages = "24:1--24:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880060",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The design of complex embedded systems deployed in
safety-critical or mission-critical applications
mandates the availability of methods to validate the
system dependability across the whole design flow. In
this article we introduce a fault injection approach,
based on loadable kernel modules and running under the
Linux operating system, which can be adopted as soon as
a running prototype of the systems is available.
Moreover, for the purpose of decoupling dependability
analysis from hardware availability, we also propose
the adoption of hardware virtualization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mohan:2010:PTA,
author = "Sibin Mohan and Frank Mueller and Michael Root and
William Hawkins and Christopher Healy and David Whalley
and Emilio Vivancos",
title = "Parametric timing analysis and its application to
dynamic voltage scaling",
journal = j-TECS,
volume = "10",
number = "2",
pages = "25:1--25:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880061",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems with real-time constraints depend on
a priori knowledge of worst-case execution times
(WCETs) to determine if tasks meet deadlines. Static
timing analysis derives bounds on WCETs but requires
statically known loop bounds. This work removes the
constraint on known loop bounds through parametric
analysis expressing WCETs as functions. Tighter WCETs
are dynamically discovered to exploit slack by dynamic
voltage scaling (DVS) saving 60\% to 82\% energy over
DVS-oblivious techniques and showing savings close to
more costly dynamic-priority DVS algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2010:RAD,
author = "Dakai Zhu",
title = "Reliability-aware dynamic energy management in
dependable embedded real-time systems",
journal = j-TECS,
volume = "10",
number = "2",
pages = "26:1--26:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880062",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent studies show that voltage scaling, which is an
efficient energy management technique, has a direct and
negative effect on system reliability because of the
increased rate of transient faults (e.g., those induced
by cosmic particles). In this article, we propose
energy management schemes that explicitly take system
reliability into consideration. The proposed
reliability-aware energy management schemes dynamically
schedule recoveries for tasks to be scaled down to
recuperate the reliability loss due to energy
management. Based on the amount of available slack, the
application size, and the fault rate changes, we
analyze when it is profitable to reclaim the slack for
energy savings without sacrificing system
reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ramaprasad:2010:TBF,
author = "Harini Ramaprasad and Frank Mueller",
title = "Tightening the bounds on feasible preemptions",
journal = j-TECS,
volume = "10",
number = "2",
pages = "27:1--27:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880063",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Data caches are an increasingly important
architectural feature in most modern computer systems.
They help bridge the gap between processor speeds and
memory access times. One inherent difficulty of using
data caches in a real-time system is the
unpredictability of memory accesses, which makes it
difficult to calculate worst-case execution times
(WCETs) of real-time tasks. While cache analysis for
single real-time tasks has been the focus of much
research in the past, bounding the preemption delay in
a multitask preemptive environment is a challenging
problem, particularly for data caches. This article
makes multiple contributions in the context of
independent, periodic tasks with deadlines less than or
equal to their periods executing on a single
processor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2010:SMA,
author = "Lian Li and Jingling Xue and Jens Knoop",
title = "Scratchpad memory allocation for data aggregates via
interval coloring in superperfect graphs",
journal = j-TECS,
volume = "10",
number = "2",
pages = "28:1--28:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880064",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Existing methods place data or code in scratchpad
memory (SPM) by relying on heuristics or resorting to
integer programming or mapping it to a graph-coloring
problem. In this article, the SPM allocation problem
for arrays is formulated as an interval coloring
problem. The key observation is that in many embedded C
programs, two arrays can be modeled such that either
their live ranges do not interfere or one contains the
other (with good accuracy). As a result, array
interference graphs often form a special class of
superperfect graphs (known as comparability graphs),
and their optimal interval colorings become efficiently
solvable. This insight has led to the development of an
SPM allocation algorithm that places arrays in an
interference graph in SPM by examining its maximal
cliques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Singh:2010:CPD,
author = "Montek Singh and Steven M. Nowick",
title = "Call for papers: {Deadline: March 15, 2011}",
journal = j-TECS,
volume = "10",
number = "2",
pages = "29:1--29:??",
month = dec,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1880050.1880065",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jan 10 09:44:12 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{West:2011:ASS,
author = "Richard West and Gabriel Parmer",
title = "Application-specific service technologies for
commodity operating systems in real-time environments",
journal = j-TECS,
volume = "10",
number = "3",
pages = "30:1--30:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952523",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In order to eliminate the costs of proprietary systems
and special purpose hardware, many real-time and
embedded computing platforms are being built on
commodity operating systems and generic hardware.
Unfortunately, many such systems are ill-suited to the
low-latency and predictable timing requirements of
real-time applications. This article, therefore,
focuses on application-specific service technologies
for low-cost commodity operating systems and hardware,
so that real-time service guarantees can be met. We
describe contrasting methods to deploy first-class
services on commodity systems that are dispatched with
low latency and execute asynchronously according to
bounds on CPU, memory, and I/O device usage.
Specifically, we present a ``user-level sandboxing''
(ULS) mechanism that relies on hardware protection to
isolate application-specific services from the core
kernel.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2011:NBF,
author = "Xue Liu and Tarek Abdelzaher",
title = "Nonutilization bounds and feasible regions for
arbitrary fixed-priority policies",
journal = j-TECS,
volume = "10",
number = "3",
pages = "31:1--31:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952524",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Prior research on schedulability bounds focused
primarily on bounding utilization/ as a means to meet
deadline constraints. Nontrivial bounds were found for
a handful of scheduling policies in which utilization
is directly related to the ability of the policy to
meet deadlines. Examples include rate-monotonic,
deadline-monotonic, and EDF scheduling. For most other
scheduling policies, however, utilization is not
correlated with schedulability. For example,
shortest-job-first can miss deadlines at an arbitrarily
low utilization. This raises the question of whether or
not some other nonutilization-based metric might be
more indicative of schedulability in those cases. This
article answers the above question positively by
extending the notion of schedulability bounds, in a
uniform manner, to arbitrary (fixed) priorities and
nonutilization metrics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nair:2011:EHB,
author = "Ajay Nair and Karthik Shankar and Roman Lysecky",
title = "Efficient hardware-based nonintrusive dynamic
application profiling",
journal = j-TECS,
volume = "10",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952525",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Application profiling---the process of monitoring an
application to determine the frequency of execution
within specific regions---is an essential step within
the design process for many software and hardware
systems. Profiling is often a critical step within
hardware/software partitioning utilized to determine
the critical kernels of an application. In this
article, we present an innovative, nonintrusive dynamic
application profiler (DAProf) capable of profiling an
executing application by monitoring the application's
short backward branches, function calls, and function
returns. The resulting profile information provides an
accurate characterization of the frequently executed
loops within the application providing a breakdown of
loop executions versus loop iterations per execution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aaraj:2011:FDE,
author = "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha",
title = "A framework for defending embedded systems against
software attacks",
journal = j-TECS,
volume = "10",
number = "3",
pages = "33:1--33:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952526",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The incidence of malicious code and software
vulnerability exploits on embedded platforms is
constantly on the rise. Yet, little effort is being
devoted to combating such threats to embedded systems.
Moreover, adapting security approaches designed for
general-purpose systems generally fails because of the
limited processing capabilities of their embedded
counterparts. In this work, we evaluate a malware and
software vulnerability exploit defense framework for
embedded systems. The proposed framework extends our
prior work, which defines two isolated execution
environments: a testing environment, wherein an
untrusted application is first tested using dynamic
binary instrumentation (DBI), and a real environment,
wherein a program is monitored at runtime using an
extracted behavioral model, along with a continuous
learning process.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Berendsen:2011:FSA,
author = "Jasper Berendsen and Biniam Gebremichael and Frits W.
Vaandrager and Miaomiao Zhang",
title = "Formal specification and analysis of {Zeroconf} using
{Uppaal}",
journal = j-TECS,
volume = "10",
number = "3",
pages = "34:1--34:32",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952527",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The model checker Uppaal is used to formally model and
analyze parts of Zeroconf, a protocol for dynamic
configuration of IPv4 link-local addresses that has
been defined in RFC 3927 of the IETF. Our goal has been
to construct a model that (a) is easy to understand by
engineers, (b) comes as close as possible to the
informal text (for each transition in the model there
should be a corresponding piece of text in the RFC),
and (c) may serve as a basis for formal verification.
Our modeling efforts revealed several errors (or at
least ambiguities) in the RFC that no one else spotted
before.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ykman-Couvreur:2011:FMM,
author = "Ch. Ykman-Couvreur and V. Nollet and F. Catthoor and
H. Corporaal",
title = "Fast multidimension multichoice knapsack heuristic for
{MP-SoC} runtime management",
journal = j-TECS,
volume = "10",
number = "3",
pages = "35:1--35:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952528",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Since the application complexity is growing and
applications can be dynamically activated, the major
challenge for heterogeneous multiprocessor platforms is
to select at runtime an energy-efficient mapping of
these applications. Taking into account that many
different possible implementations per application can
be available, and that the selection must meet the
application deadlines under the available platform
resources, this runtime optimization problem can be
modeled as a Multidimension Multichoice Knapsack
Problem (MMKP), which is known to be NP-hard. Not only
algorithms for an optimal solution, but also
state-of-the-art heuristics for real-time systems are
still too slow for runtime management of multiprocessor
platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ragel:2011:HHS,
author = "Roshan G. Ragel and Sri Parameswaran",
title = "A hybrid hardware--software technique to improve
reliability in embedded processors",
journal = j-TECS,
volume = "10",
number = "3",
pages = "36:1--36:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952529",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Numerous methods have been described in research
literature with methods to improve reliability of
processors by the use of control-flow checking. High
performance and code-size penalties cripple the
proposed software approaches, while hardware approaches
are not scalable and are thus rarely implemented in
real embedded systems. In this article, we show that by
including control-flow checking as an issue to be
considered when designing as embedded processor, we are
able to reduce overheads considerably and still provide
a scalable solution to this problem. The technique
described in this article includes architectural
improvements to the processor and binary rewriting of
the application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huynh:2011:EAR,
author = "Johnny Huynh and Jos{\'e} Nelson Amaral and Paul
Berube and Sid-Ahmed-Ali Touati",
title = "Evaluating address register assignment and offset
assignment algorithms",
journal = j-TECS,
volume = "10",
number = "3",
pages = "37:1--37:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952530",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In digital signal processors (DSPs), variables are
accessed using $k$ address registers. The problem of
finding a memory layout, for a set of variables, that
minimizes the address-computation overhead is known as
the General Offset Assignment (GOA) problem. The most
common approach to this problem is to partition the set
of variables into $k$ partitions and to assign each
partition to an address register. Thus, effectively
decomposing the GOA problem into several Simple Offset
Assignment (SOA) problems. Many heuristic-based
algorithms are proposed in the literature to
approximate solutions to both the variable partitioning
and the SOA problems. However, the address-computation
overhead of the resulting memory layouts are not
accurately evaluated.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Diguet:2011:CLB,
author = "Jean-Philippe Diguet and Yvan Eustache and Guy
Gogniat",
title = "Closed-loop--based self-adaptive {Hardware\slash
Software-Embedded} systems: Design methodology and
smart {CAM} case study",
journal = j-TECS,
volume = "10",
number = "3",
pages = "38:1--38:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952522.1952531",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon May 2 10:07:27 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents our methodology for implementing
self-adaptivness within an OS-based and reconfigurable
embedded system according to objectives such as quality
of service, performance, or power consumption. We
detail our approach to separate application-specific
decisions and hardware\slash software-implementation
decisions at system level. The former are related to
the efficiency control of applications and based on the
knowledge of application engineers. The latter are
generic and address the choice between various hardware
and software implementations according to user
objectives. The decision management is implemented as
an adaptive closed-loop model. We describe how each
design step may be implemented and especially how we
solved the issue of stability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gamatie:2011:MDD,
author = "Abdoulaye Gamati{\'e} and S{\'e}bastien {Le Beux} and
{\'E}ric Piel and Rabie {Ben Atitallah} and Anne Etien
and Philippe Marquet and Jean-Luc Dekeyser",
title = "A Model-Driven Design Framework for Massively Parallel
Embedded Systems",
journal = j-TECS,
volume = "10",
number = "4",
pages = "39:1--39:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043663",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern embedded systems integrate more and more
complex functionalities. At the same time, the
semiconductor technology advances enable to increase
the amount of hardware resources on a chip for the
execution. Massively parallel embedded systems
specifically deal with the optimized usage of such
hardware resources to efficiently execute their
functionalities. The design of these systems mainly
relies on the following challenging issues: first, how
to deal with the parallelism in order to increase the
performance; second, how to abstract their
implementation details in order to manage their
complexity; third, how to refine these abstract
representations in order to produce efficient
implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2011:DPT,
author = "Seungkyun Kim and Kiwon Kwon and Chihun Kim and
Choonki Jang and Jaejin Lee and Sang Lyul Min",
title = "Demand Paging Techniques for Flash Memory Using
Compiler Post-Pass Optimizations",
journal = j-TECS,
volume = "10",
number = "4",
pages = "40:1--40:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043664",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose an application-specific
demand paging mechanism for low-end embedded systems
that have flash memory as secondary storage. These
systems are not equipped with virtual memory. A small
memory space called an execution buffer is used to page
the code of an application. An application-specific
page manager manages the buffer. The page manager is
automatically generated by a compiler post-pass
optimizer and combined with the application image. The
post-pass optimizer analyzes the executable image and
transforms function call/return instructions into calls
to the page manager. As a result, each function in the
code can be loaded into the memory on demand at
runtime.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dini:2011:LLA,
author = "Gianluca Dini and Ida M. Savino",
title = "{LARK}: a Lightweight Authenticated {ReKeying} Scheme
for Clustered Wireless Sensor Networks",
journal = j-TECS,
volume = "10",
number = "4",
pages = "41:1--41:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043665",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Group communication has proven a powerful paradigm for
designing applications and services in Wireless Sensor
Networks (WSNs). Given the tight interaction between
WSNs and the physical world, a security infringement
may translate into a safety infringement. Therefore, in
order to fully exploit the group communication paradigm
we need to secure it. Traditionally, this requirement
has been formalized in terms of backward and forward
security and fulfilled by means of rekeying. In WSNs,
group rekeying becomes particularly a complex problem
because communication takes place over an easily
accessible wireless medium and because sensor nodes
have severe limitations in terms of computing, storage,
energy, and tamper-resistance capabilities for cost
reasons.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schoeberl:2011:HAL,
author = "Martin Schoeberl and Stephan Korsholm and Tomas
Kalibera and Anders P. Ravn",
title = "A Hardware Abstraction Layer in {Java}",
journal = j-TECS,
volume = "10",
number = "4",
pages = "42:1--42:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043666",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems use specialized hardware devices to
interact with their environment, and since they have to
be dependable, it is attractive to use a modern,
type-safe programming language like Java to develop
programs for them. Standard Java, as a
platform-independent language, delegates access to
devices, direct memory access, and interrupt handling
to some underlying operating system or kernel, but in
the embedded systems domain resources are scarce and a
Java Virtual Machine (JVM) without an underlying
middleware is an attractive architecture. The
contribution of this article is a proposal for Java
packages with hardware objects and interrupt handlers
that interface to such a JVM.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gilroy:2011:RHA,
author = "Michael Gilroy and James Irvine and Robert Atkinson",
title = "{RAID 6} Hardware Acceleration",
journal = j-TECS,
volume = "10",
number = "4",
pages = "43:1--43:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043667",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Inexpensive, reliable hard disk storage is
increasingly required in both businesses and the home.
As disk capacities increase and multiple drives are
combined in one system the probability of multiple disk
failures increases. Through the adoption of RAID 6 the
capability to recover from up to two simultaneous disk
failures becomes available. In this article, we present
three different RAID 6 implementations each tailored to
support different target applications and optimized to
reduce overall hardware resource utilization. We
present an optimal Reed-Solomon-based RAID 6
implementation for arrays of four disks. We also
present the smallest in terms of hardware resource
utilization as well having the highest throughput RAID
6 hardware solution for disk arrays of up to 15
drives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhuang:2011:CST,
author = "Xiaotong Zhuang and Santosh Pande",
title = "Compiler-Supported Thread Management for Multithreaded
Network Processors",
journal = j-TECS,
volume = "10",
number = "4",
pages = "44:1--44:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043668",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Traditionally, runtime management involving CPU
sharing, real-time scheduling, etc., is provided by the
runtime environment (typically an operating system)
using hardware support such as timers and interrupts.
However, due to stringent performance requirements on
network processors, neither OS nor hardware mechanisms
are typically feasible/available. Mapping packet
processing tasks on network processors involves complex
trade-offs to maximize parallelism and pipelining. Due
to an increase in the size of the code store and
complexity of application requirements, network
processors are being programmed with heterogeneous
threads that may execute code belonging to different
tasks on a given micro-engine. Also, most network
applications are streaming applications that are
typically processed in a pipelined fashion.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Stuart:2011:RRN,
author = "Matthias Bo Stuart and Mikkel Bystrup Stensgaard and
Jens Spars{\o}",
title = "The {ReNoC} Reconfigurable {Network-on-Chip}:
Architecture, Configuration Algorithms, and
Evaluation",
journal = j-TECS,
volume = "10",
number = "4",
pages = "45:1--45:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043669",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a reconfigurable network-on-chip
architecture called ReNoC, which is intended for use in
general-purpose multiprocessor system-on-chip
platforms, and which enables application-specific
logical NoC topologies to be configured, thus providing
both efficiency and flexibility. The article presents
three novel algorithms that synthesize an
application-specific NoC topology, map it onto the
physical ReNoC architecture, and create deadlock-free,
application-specific routing algorithms. We apply our
algorithms to a mixture of real and synthetic
applications and target three different physical
architectures. Compared to a conventional NoC, ReNoC
reduces power consumption by up to 58\% on average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cucinotta:2011:RMA,
author = "Tommaso Cucinotta and Luca Abeni and Luigi Palopoli
and Giuseppe Lipari",
title = "A Robust Mechanism for Adaptive Scheduling of
Multimedia Applications",
journal = j-TECS,
volume = "10",
number = "4",
pages = "46:1--46:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043670",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose an adaptive scheduling technique to
schedule highly dynamic multimedia tasks on a CPU. We
use a combination of two techniques: the first one is a
feedback mechanism to track the resource requirements
of the tasks based on ``local'' observations. The
second one is a mechanism that operates with a
``global'' visibility, reclaiming unused bandwidth. The
combination proves very effective: resource reclaiming
increases the robustness of the feedback, while the
identification of the correct bandwidth made by the
feedback increases the effectiveness of the
reclamation. We offer both theoretical results and an
extensive experimental validation of the approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Touati:2011:ESR,
author = "Sid-Ahmed-Ali Touati and Frederic Brault and Karine
Deschinkel and Beno{\^\i}t Dupont de Dinechin",
title = "Efficient Spilling Reduction for Software Pipelined
Loops in Presence of Multiple Register Types in
Embedded {VLIW} Processors",
journal = j-TECS,
volume = "10",
number = "4",
pages = "47:1--47:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043671",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Integrating register allocation and software
pipelining of loops is an active research area. We
focus on techniques that precondition the dependence
graph before software pipelining in order to ensure
that no register spill instructions are inserted by the
register allocator in the software pipelined loop. If
spilling is not necessary for the input code,
preconditioning techniques insert dependence arcs so
that the maximum register pressure MAXLIVE achieved by
any loop schedule is below the number of available
registers, without hurting the initiation interval if
possible. When a solution exists, a spill-free software
pipeline is guaranteed to exist. Existing
preconditioning techniques consider one register type
(register class) at a time [Deschinkel and Touati
2008].",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2011:ARA,
author = "Gang Zhou and Qiang Li and Jingyuan Li and Yafeng Wu
and Shan Lin and Jian Lu and Chieh-Yih Wan and Mark D.
Yarvis and John A. Stankovic",
title = "Adaptive and Radio-Agnostic {QoS} for Body Sensor
Networks",
journal = j-TECS,
volume = "10",
number = "4",
pages = "48:1--48:??",
month = nov,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2043662.2043672",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 19 15:49:06 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As wireless devices and sensors are increasingly
deployed on people, researchers have begun to focus on
wireless body-area networks. Applications of wireless
body sensor networks include healthcare, entertainment,
and personal assistance, in which sensors collect
physiological and activity data from people and their
environments. In these body sensor networks, quality of
service is needed to provide reliable data
communication over prioritized data streams. This
article proposes BodyQoS, the first running QoS system
demonstrated on an emulated body sensor network.
BodyQoS adopts an asymmetric architecture, in which
most processing is done on a resource-rich aggregator,
minimizing the load on resource-limited sensor nodes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wandeler:2012:UGS,
author = "Ernesto Wandeler and Alexander Maxiaguine and Lothar
Thiele",
title = "On the use of greedy shapers in real-time embedded
systems",
journal = j-TECS,
volume = "11",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146418",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Traffic shaping is a well-known technique in the area
of networking and is proven to reduce global buffer
requirements and end-to-end delays in networked
systems. Due to these properties, shapers also play an
increasingly important role in the design of
multiprocessor embedded systems that exhibit a
considerable amount of on-chip traffic. Despite the
growing importance of traffic shapping in this area, no
methods exist for analyzing shapers in distributed
embedded systems and for incorporating them into a
system-level performance analysis. Until now it was not
possible to determine the effect of shapers on
end-to-end delay guarantees or buffer requirements in
such systems. In this work, we present a method for
analyzing greedy shapers, and we embed this analysis
method into a well-established modular performance
analysis framework for real-time embedded systems. The
presented approach enables system-level performance
analysis of complete systems with greedy shapers, and
we prove its applicability by analyzing three case
study systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hamers:2012:EMS,
author = "Juan Hamers and Lieven Eeckhout",
title = "Exploiting media stream similarity for
energy-efficient decoding and resource prediction",
journal = j-TECS,
volume = "11",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146419",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces a novel approach to
energy-efficient media stream decoding that is based on
the notion of media stream similarity. The key idea is
that platform-independent scenarios with similar
decoding complexity can be identified within and across
media streams. A device that decodes a media stream
annotated with scenario information can then adjust its
processor clock frequency and voltage level based on
these scenarios for lower energy consumption. Our
evaluation, done using the H.264 AVC decoder and 12
reference video streams, shows an average energy
reduction of 44\% while missing less than 0.2\% of the
frame deadlines using scenario-driven video decoding.
An additional application of scenario-based media
stream annotation is to predict required resources
(compute power and energy) for consuming a given
service on a given device. Resource prediction is
extremely useful in a client-server setup in which the
client requests a media service from the server or
content provider. The content provider (in cooperation
with the client) can then determine what service
quality to deliver, given the client's available
resources. Scenario-aware resource prediction can
predict (compute power and energy) consumption with
errors less than 4\% (and an overall average 1.4\%
error).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhong:2012:WSN,
author = "Ziguo Zhong and Tian He",
title = "Wireless sensor node localization by multisequence
processing",
journal = j-TECS,
volume = "11",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146420",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless Sensor Networks have been proposed for use in
many location-dependent applications. Most of these
need to identify the locations of sensor nodes, a
challenging task because of severe constraints on cost,
energy and effective range of sensor devices. To
overcome limitations in existing solutions, we present
a Multi-Sequence Positioning (MSP) method for
large-scale stationary sensor node localization in
outdoor environments. The novel idea behind MSP is to
reconstruct and estimate two-dimensional location
information for each sensor node by processing multiple
one-dimensional node sequences, easily obtained through
loosely guided event distribution. Starting from a
basic MSP design, we propose four optimizations that
work together to increase localization accuracy. We
address several interesting issues such as incomplete
(partial) node sequences and sequence flip, found in
the Mirage test-bed we built. We have evaluated the MSP
system through theoretical analysis, extensive
simulation as well as two physical systems (an indoor
version with 46 MICAz motes and an outdoor version with
20 MICAz motes). Evaluation demonstrates that MSP can
achieve an accuracy within one foot, requiring neither
additional costly hardware on sensor nodes nor precise
event distribution. In fact, it provides a nice
tradeoff between physical cost (anchors) and soft cost
(events) while maintaining localization accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Peng:2012:BHA,
author = "Chunyi Peng and Guobin Shen and Yongguang Zhang",
title = "{BeepBeep}: a high-accuracy acoustic-based system for
ranging and localization using {COTS} devices",
journal = j-TECS,
volume = "11",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146421",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present the design and implementation of BeepBeep,
a high-accuracy acoustic-based system for ranging and
localization. It is a pure software-based solution and
uses the most basic set of commodity hardware --- a
speaker, a microphone, and some form of interdevice
communication. The ranging scheme works without any
infrastructure and is applicable to sensor platforms
and commercial-off-the-shelf mobile devices. It
achieves high accuracy through three techniques:
two-way sensing, self-recording, and sample counting.
We further devise a scalable and fast localization
scheme. Our experiments show that up to one-centimeter
ranging accuracy and three-centimeter localization
accuracy can be achieved.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kumar:2012:CMA,
author = "T. S. Rajesh Kumar and R. Govindarajan and C. P.
Ravikumar",
title = "On-chip memory architecture exploration framework for
{DSP} processor-based embedded system on chip",
journal = j-TECS,
volume = "11",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146422",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today's SoCs are complex designs with multiple
embedded processors, memory subsystems, and application
specific peripherals. The memory architecture of
embedded SoCs strongly influences the power and
performance of the entire system. Further, the memory
subsystem constitutes a major part (typically up to
70\%) of the silicon area for the current day SoC. In
this article, we address the on-chip memory
architecture exploration for DSP processors which are
organized as multiple memory banks, where banks can be
single/dual ported with non-uniform bank sizes. In this
paper we propose two different methods for physical
memory architecture exploration and identify the
strengths and applicability of these methods in a
systematic way. Both methods address the memory
architecture exploration for a given target application
by considering the application's data access
characteristics and generates a set of Pareto-optimal
design points that are interesting from a power,
performance and VLSI area perspective. To the best of
our knowledge, this is the first comprehensive work on
memory space exploration at physical memory level that
integrates data layout and memory exploration to
address the system objectives from both hardware design
and application software development perspective.
Further we propose an automatic framework that explores
the design space identifying 100's of Pareto-optimal
design points within a few hours of running on a
standard desktop configuration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pande:2012:PDP,
author = "Amit Pande and Joseph Zambreno",
title = "{Poly-DWT}: {Polymorphic} wavelet hardware support for
dynamic image compression",
journal = j-TECS,
volume = "11",
number = "1",
pages = "6:1--6:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146423",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many modern computing applications have been enabled
through the use of real-time multimedia processing.
While several hardware architectures have been proposed
in the research literature to support such primitives,
these fail to address applications whose performance
and resource requirements have a dynamic aspect.
Embedded multimedia systems typically need a power and
computation efficient design in addition to good
compression performance. In this article, we introduce
a Polymorphic Wavelet Architecture (Poly-DWT) as a
crucial building block towards the development of
embedded systems to address such challenges. We
illustrate how our Poly-DWT architecture can
potentially make dynamic resource allocation decisions,
such as the internal bit representation and the
processing kernel, according to the application
requirements. We introduce a filter switching
architecture that allows for dynamic switching between
5/3 and 9/7 wavelet filters and leads to a more power
efficient design. Further, a multiplier-free design
with a low adder requirement demonstrates the potential
of Poly-DWT for embedded systems. Through an FPGA
prototype, we perform a quantitative analysis of our
Poly-DWT architecture, and compare our filter to
existing approaches to illustrate the area and
performance benefits inherent in our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2012:RGV,
author = "Suk-Hyun Seo and Jin-Ho Kim and Sung-Ho Hwang and Key
Ho Kwon and Jae Wook Jeon",
title = "A reliable gateway for in-vehicle networks based on
{LIN}, {CAN}, and {FlexRay}",
journal = j-TECS,
volume = "11",
number = "1",
pages = "7:1--7:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146424",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article describes a reliable gateway for
in-vehicle networks. Such networks include local
interconnect networks, controller area networks, and
FlexRay. There is some latency when transferring a
message from one node (source) to another node
(destination). A high probability of error exists due
to different protocol specifications such as baud-rate,
and message frame format. Therefore, deploying a
reliable gateway is a challenge to the automotive
industry. We propose a reliable gateway based on the
OSEK/VDX components for in-vehicle networks. We also
examine the gateway system developed, and then we
evaluate the performance of our proposed system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2012:EFP,
author = "Kai Huang and Wolfgang Haid and Iuliana Bacivarov and
Matthias Keller and Lothar Thiele",
title = "Embedding formal performance analysis into the design
cycle of {MPSoCs} for real-time streaming
applications",
journal = j-TECS,
volume = "11",
number = "1",
pages = "8:1--8:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146425",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern real-time streaming applications are
increasingly implemented on multiprocessor
systems-on-chip (MPSoC). The implementation, as well as
the verification of real-time applications executing on
MPSoCs, are difficult tasks, however. A major challenge
is the performance analysis of MPSoCs, which is
required for early design space exploration and final
system verification. Simulation-based methods are not
well-suited for this purpose, due to long runtimes and
non-exhaustive corner-case coverage. To overcome these
limitations, formal performance analysis methods that
provide guarantees for meeting real-time constraints
have been developed. Embedding formal performance
analysis into the MPSoC design cycle requires the
generation of a faithful analysis model and its
calibration with the system-specific parameters. In
this article, a design flow that automates these steps
is presented. In particular, we integrate modular
performance analysis (MPA) into the distributed
operation layer (DOL) MPSoC programming environment.
The result is an MPSoC software design flow that allows
for automatically generating the system implementation,
together with an analysis model for system
verification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2012:AFS,
author = "Yuan-Hao Chang and Po-Liang Wu and Tei-Wei Kuo and
Shih-Hao Hung",
title = "An adaptive file-system-oriented {FTL} mechanism for
flash-memory storage systems",
journal = j-TECS,
volume = "11",
number = "1",
pages = "9:1--9:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146426",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As flash memory becomes popular over various
platforms, there is a strong demand regarding the
performance degradation problem, due to the special
characteristics of flash memory. This research proposes
the design of a file-system-oriented flash translation
layer, in which a filter mechanism is designed to
separate the access requests of file-system metadata
and file contents for better performance. A recovery
scheme is then proposed for maintaining the integrity
of a file system. The proposed flash translation layer
is implemented as a Linux device driver and evaluated
with respect to ext2 and ext3 file systems. Experiments
were also done over NTFS by a series of realistic
traces. The experimental results show significant
performance improvement over ext2, ext3, and NTFS file
systems with limited system overheads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2012:SRS,
author = "Chunxiao Li and Niraj K. Jha and Anand Raghunathan",
title = "Secure reconfiguration of software-defined radio",
journal = j-TECS,
volume = "11",
number = "1",
pages = "10:1--10:??",
month = mar,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2146417.2146427",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Apr 2 17:42:24 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Software-defined radio (SDR) implements a radio system
in software that executes on a programmable processor.
The components of SDR, such as the filters, amplifiers,
and modulators, can be easily reconfigured to adapt to
the operating environment and user preferences.
However, the flexibility of radio reconfiguration
brings along the serious security concern of malicious
modification of software in the SDR system, leading to
radio malfunction and interference with other users'
communications. Both the SDR device and the network
need to be protected from such malicious radio
reconfiguration. In this article, a new architecture
targeted at protecting SDR devices from malicious
reconfiguration is proposed. The architecture is based
on robust separation of the radio operation environment
and user application environment, through the use of
virtualization. A new radio middleware layer is
designed to securely intercept all attempts to
reconfigure the radio, and a security policy monitor
checks the target configuration against security
policies that represent the interests of various
parties. Even if the operating system in the user
application environment is compromised, the proposed
architecture can ensure secure reconfiguration in the
radio operation environment. We have prototyped the
proposed secure SDR architecture using VMware and the
GNU Radio toolkit and demonstrate that overheads
incurred by the architecture are small and tolerable.
Therefore, we believe that the proposed solution could
be applied to address secure SDR reconfiguration in
both general-purpose and embedded computing systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Berekovic:2012:ISS,
author = "Mladen Berekovic and Samarjit Chakraborty and Petru
Eles and Andy D. Pimentel",
title = "Introduction to the {Special Section on
ESTIMedia'08}",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "11:1--11:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180891",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2012:PAR,
author = "Jun Zhu and Ingo Sander and Axel Jantsch",
title = "Performance Analysis of Reconfigurations in Adaptive
Real-Time Streaming Applications",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "12:1--12:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180888",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose a performance analysis framework for
adaptive real-time synchronous data flow streaming
applications on runtime reconfigurable FPGAs. As the
main contribution, we present a constraint based
approach to capture both streaming application
execution semantics and the varying design concerns
during reconfigurations. With our event models
constructed as cumulative functions on data streams, we
exploit a novel compile-time analysis framework based
on iterative timing phases. Finally, we implement our
framework on a public domain constraint solver, and
illustrate its capabilities in the analysis of design
trade-offs due to reconfigurations with experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hsieh:2012:PBP,
author = "Kun-Yuan Hsieh and Chi-Hua Lai and Shang-Hong Lai and
Jenq Kuen Lee",
title = "Parallelization of Belief Propagation on {Cell}
Processors for Stereo Vision",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "13:1--13:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180889",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Markov random field models provide a robust
formulation for the stereo vision problem of inferring
three-dimensional scene geometry from two images taken
from different viewpoints. One of the most advanced
algorithms for solving the associated energy
minimization problem in the formulation is belief
propagation (BP). Although BP provides very accurate
results in solving stereo vision problems, the high
computational cost of the algorithm hinders it from
real-time applications. In recent years, multicore
architectures have been widely adopted in various
industrial application domains. The high computing
power of multicore processors provides new
opportunities to implement stereo vision algorithms.
This article examines and extracts the parallelisms in
the BP method for stereo vision on multicore
processors. This article shows that parallelism of the
algorithm can be efficiently utilized on multicore
processors. The results show that parallelization on
multicore processors provides a speedup for the BP
algorithm of almost 15 times compared to the
single-processor implementation on the PPE of the Cell
BE. The experimental results also indicate that a frame
rate of 6.5 frames/second is possible when implementing
the parallelized BP algorithm on the multicore
processor of Cell BE with one PPE and six SPEs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Terechko:2012:BPS,
author = "Andrei Terechko and Jan Hoogerbrugge and Ghiath Alkadi
and Surendra Guntur and Anirban Lahiri and Marc
Duranton and Clemens W{\"u}st and Phillip Christie and
Axel Nackaerts and Aatish Kumar",
title = "Balancing Programmability and Silicon Efficiency of
Heterogeneous Multicore Architectures",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "14:1--14:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180890",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multicore architectures provide scalable performance
with a lower hardware design effort than single core
processors. Our article presents a design methodology
and an embedded multicore architecture, focusing on
reducing the software design complexity and boosting
the performance density. First, we analyze
characteristics of the Task-Level Parallelism in modern
multimedia workloads. These characteristics are used to
formulate requirements for the programming model. Then
we translate the programming model requirements to an
architecture specification, including a novel
low-complexity implementation of cache coherence and a
hardware synchronization unit. Our evaluation
demonstrates that the novel coherence mechanism
substantially simplifies hardware design, while
reducing the performance by less than 18\% relative to
a complex snooping technique. Compared to a single
processor core, the multicores have already proven to
be more area- and energy-efficient. However, the
multicore architectures in embedded systems still
compete with highly efficient function-specific
hardware accelerators. In this article we identify five
architectural methods to boost performance density of
multicores; microarchitectural downscaling, asymmetric
multicore architectures, multithreading, generic
accelerators, and conjoining. Then, we present a novel
methodology to explore multicore design spaces,
including the architectural methods improving the
performance density. The methodology is based on a
complex formula computing performances of heterogeneous
multicore systems. Using this design space exploration
methodology for HD and QuadHD H.264 video decoding, we
estimate that the required areas of multicores in CMOS
45 nm are 2.5 mm$^2$ and 8.6 mm$^2$, respectively.
These results suggest that heterogeneous multicores are
cost-effective for embedded applications and can
provide a good programmability support.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khajeh:2012:EAA,
author = "Amin Khajeh and Minyoung Kim and Nikil Dutt and Ahmed
M. Eltawil and Fadi J. Kurdahi",
title = "Error-Aware Algorithm\slash Architecture Coexploration
for Video Over Wireless Applications",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "15:1--15:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180892",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose a cross-layer
algorithm/architecture coexploration for wireless
multimedia systems to coordinate interactions among
sublayer optimizers for improvements in
energy/QoS/reliability. By exploiting the inherent
redundancy in wireless multimedia systems, we generate
an expanded design space over traditional
layer-specific approaches. Specifically, we control the
error resilient encoder at the application layer to
provide awareness of architectural exploration at the
physical layer allowing new design points with lower
power consumption via aggressive voltage scaling. While
trying to reduce energy consumption, the fault tolerant
technique compensates the effect of the hardware and
network errors due to aggressive voltage scaling and
lossy transmission, respectively. Our experiments on
H.263 video over a WCDMA communication system
demonstrate that coexploration enlarges the feasible
design space, which results in significant power
savings of more than 20\% in the WCDMA modem.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Salamy:2012:SOT,
author = "Hassan Salamy and J. Ramanujam",
title = "Storage Optimization through Offset Assignment with
Variable Coalescing",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "16:1--16:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180893",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most modern digital signal processors (DSPs) provide
multiple address registers and a dedicated address
generation unit (AGU) which performs address generation
in parallel to instruction execution. There is no
address computation overhead if the next address is
within the auto-modify range. A careful placement of
variables in memory is utilized to decrease the number
of address arithmetic instructions and thus to generate
compact and efficient code. The simple offset
assignment (SOA) problem concerns the layout of
variables for machines with one address register and
the general offset assignment (GOA) deals with multiple
address registers. Both these problems assume that each
variable needs to be allocated for the entire duration
of a program. Both SOA and GOA are NP-complete. In this
article, we present effective heuristics for the simple
and the general offset assignment problems with
variable coalescing where two or more non-interfering
variables can be mapped into the same memory location.
Results on several benchmarks show the significant
improvement of our proposed heuristics compared to
other heuristics in the literature.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Falk:2012:ISS,
author = "Heiko Falk and Peter Marwedel",
title = "Introduction to the {Special Section on SCOPES'09}",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "17:1--17:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180894",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2012:FLF,
author = "Jaegeuk Kim and Hyotaek Shim and Seon-Yeong Park and
Seungryoul Maeng and Jin-Soo Kim",
title = "{FlashLight}: a Lightweight Flash File System for
Embedded Systems",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "18:1--18:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180895",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A very promising approach for using NAND flash memory
as a storage medium is a flash file system. In order to
design a higher-performance flash file system, two
issues should be considered carefully. One issue is the
design of an efficient index structure that contains
the locations of both files and data in the flash
memory. For large-capacity storage, the index structure
must be stored in the flash memory to realize low
memory consumption; however, this may degrade the
system performance. The other issue is the design of a
novel garbage collection (GC) scheme that reclaims
obsolete pages. This scheme can induce considerable
additional read and write operations while identifying
and migrating valid pages. In this article, we present
a novel flash file system that has the following
features: (i) a lightweight index structure that
introduces the hybrid indexing scheme and intra-inode
index logging, and (ii) an efficient GC scheme that
adopts a dirty list with an on-demand GC approach as
well as fine-grained data separation and erase-unit
data allocation. We implemented FlashLight in a Linux
OS with kernel version 2.6.21 on an embedded device.
The experimental results obtained using several
benchmark programs confirm that FlashLight improves the
performance by up to 27.4\% over UBIFS by alleviating
index management and GC overheads by up to 33.8\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Eriksson:2012:ICG,
author = "Mattias Eriksson and Christoph Kessler",
title = "Integrated Code Generation for Loops",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "19:1--19:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180896",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Code generation in a compiler is commonly divided into
several phases: instruction selection, scheduling,
register allocation, spill code generation, and, in the
case of clustered architectures, cluster assignment.
These phases are interdependent; for instance, a
decision in the instruction selection phase affects how
an operation can be scheduled We examine the effect of
this separation of phases on the quality of the
generated code. To study this we have formulated
optimal methods for code generation with integer linear
programming; first for acyclic code and then we extend
this method to modulo scheduling of loops. In our
experiments we compare optimal modulo scheduling, where
all phases are integrated, to modulo scheduling, where
instruction selection and cluster assignment are done
in a separate phase. The results show that, for an
architecture with two clusters, the integrated method
finds a better solution than the nonintegrated method
for 27\% of the instances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Murray:2012:ASL,
author = "Alastair Murray and Bj{\"o}rn Franke",
title = "Adaptive Source-Level Data Assignment to Dual Memory
Banks",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "20:1--20:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180897",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dual memory banks provide extra memory bandwidth to
DSP applications and enable simultaneous access to two
operands if the data is partitioned appropriately.
Fully automated and compiler integrated approaches to
data partitioning and memory bank assignment have,
however, found little acceptance by DSP software
developers. In this article we present a novel
source-level approach that is more programmer friendly.
Our scheme is based on soft graph coloring and highly
adaptive heuristics generated by genetic programming.
We have evaluated our scheme on an Analog Devices
TigerSHARC TS-101 DSP and achieved speedups of up to
57\% on 13 UTDSP benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boissinot:2012:SPR,
author = "Benoit Boissinot and Philip Brisk and Alain Darte and
Fabrice Rastello",
title = "{SSI} Properties Revisited",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "21:1--21:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180898",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The static single information (SSI) form is an
extension of the static single assignment (SSA) form, a
well-established compiler intermediate representation
that has been successfully used for numerous compiler
analysis and optimizations. Several interesting results
have also been shown for SSI form concerning liveness
analysis and the representation of live-ranges of
variables, which could make SSI form appealing for
just-in-time compilation. Unfortunately, we have
uncovered several mistakes in the previous literature
on SSI form, which, admittedly, is already quite
sparse. This article corrects the mistakes that are
most germane to SSI form. We first explain why the two
definitions of SSI form proposed in past literature,
first by C. S. Ananian, then by J. Singer, are not
equivalent. Our main result is then to prove that basic
blocks, and thus program points, can be totally ordered
so that live-ranges of variables correspond to
intervals on a line, a result that holds for both
variants of SSI form. In other words, in SSI form, the
intersection graph defined by live-ranges is an
interval graph, a stronger structural property than for
SSA form for which the intersection graph of
live-ranges is chordal. Finally, we show how this
structure of live-ranges can be used to simplify
liveness analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Franke:2012:SPM,
author = "Bj{\"o}rn Franke",
title = "Statistical Performance Modeling in Functional
Instruction Set Simulators",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "22:1--22:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180899",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Despite the recent progress in improving the speed of
instruction-accurate simulators cycle-accurate
simulation is still prohibitively slow for all but the
most basic programs. In this article we present a
statistical machine learning approach to performance
estimation in fast, instruction accurate simulators and
evaluate our methodology comprehensively against three
popular embedded RISC processors and about 300 embedded
applications. We show that our methodology is capable
of providing accurate performance estimations with an
average error of less than 3.9\% while, on average,
operating $ \approx 14.5 $ times faster than
cycle-accurate simulation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chandraiah:2012:CAR,
author = "Pramod Chandraiah and Rainer D{\"o}mer",
title = "Computer-Aided Recoding to Create Structured and
Analyzable System Models",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "23:1--23:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180900",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In embedded system design, the quality of the input
model has a direct bearing on the effectiveness of the
system exploration and synthesis tools. Given a
well-written system model, tools today are effective in
generating working implementations. However, readily
available C reference code is not conducive for
immediate system synthesis as it lacks needed features
for automatic analysis and synthesis. Among others, the
lack of proper structure and the presence of
intractable pointers in the reference code are factors
that seriously hamper the effectiveness of system
design tools. To overcome these deficiencies, we aim to
automate the conversion of flat C code into a
well-structured system model by applying automated
source code transformations. We present a set of
computer-aided recoding operations that enable the
system designer to mitigate pointer problems and
quickly create the necessary structural hierarchy so
that the design model becomes easily analyzable and
synthesizable. Utilizing the designer's knowledge, our
interactive recoding transformations aid the designer
in efficiently creating well-structured system models
for rapid design space exploration and successful
synthesis. Our estimated and measured experimental
results show significant productivity gains through a
substantial reduction of the model creation time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dubach:2012:EPE,
author = "Christophe Dubach and Timothy M. Jones and Michael F.
P. O'Boyle",
title = "Exploring and Predicting the Effects of
Microarchitectural Parameters and Compiler
Optimizations on Performance and Energy",
journal = j-TECS,
volume = "11S",
number = "1",
pages = "24:1--24:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180887.2180901",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 7 16:18:52 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded processor performance is dependent on both
the underlying architecture and the compiler
optimizations applied. However, designing both
simultaneously is extremely difficult to achieve due to
the time constraints designers must work under.
Therefore, current methodology involves designing
compiler and architecture in isolation, leading to
suboptimal performance of the final product. This
article develops a novel approach to this codesign
space problem. For our specific design space, we
demonstrate that we can automatically predict the
performance that an optimizing compiler would achieve
without actually tuning it for any of the
microarchitecture configurations considered. Once
trained, a single run of the program compiled with the
standard optimization setting is enough to make a
prediction on the new microarchitecture with just a
3.2\% error rate on average. This allows the designer
to accurately choose an architectural configuration
with knowledge of how an optimizing compiler will
perform on it. We use this to find the best optimizing
compiler/architectural configuration in our codesign
space and demonstrate that it achieves an average 19\%
performance improvement and energy savings of 16\%
compared to the baseline, nearly doubling the
energy-efficiency measured as the energy-delay-squared
product (EDD).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Staff:2012:APA,
author = "{TECS Staff}",
title = "Abstracts of Papers to appear in {Special Supplemental
Issue of TECS (v11, iSupplemental1)}",
journal = j-TECS,
volume = "11",
number = "2",
pages = "25:1--25:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220337",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In order to speed up the publication process, we have
begun to publish supplemental online-only issues. The
following abstracts describe the articles in the first
such issue, Vol. 11S(1). These articles are available
in the Digital Library.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2012:PPI,
author = "Jongeun Lee and Aviral Shrivastava",
title = "{PICA}: {Processor Idle Cycle Aggregation} for
Energy-Efficient Embedded Systems",
journal = j-TECS,
volume = "11",
number = "2",
pages = "26:1--26:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220338",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Processor Idle Cycle Aggregation (PICA) is a promising
approach for low-power execution of processors, in
which small memory stalls are aggregated to create
large ones, enabling profitable switch of the processor
into low-power mode. We extend the previous approach in
three dimensions. First we develop static analysis for
the PICA technique and present optimal parameters for
five common types of loops based on steady-state
analysis. Second, to remedy the weakness of
software-only control in varying environment, we
enhance PICA with minimal hardware extension that
ensures correct execution for any loops and parameters,
thus greatly facilitating exploration-based parameter
tuning. Third, we demonstrate that our PICA technique
can be applied to certain types of nested loops with
variable bounds, thus enhancing the applicability of
PICA. We validate our analytical model against
simulation-based optimization and also show, through
our experiments on embedded application benchmarks,
that our technique can be applied to a wide range of
loops with average 20\% energy reductions, compared to
executions without PICA.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{McIntire:2012:EES,
author = "Dustin McIntire and Thanos Stathopoulos and Sasank
Reddy and Thomas Schmidt and William J. Kaiser",
title = "Energy-Efficient Sensing with the {Low Power, Energy
Aware Processing} ({LEAP}) Architecture",
journal = j-TECS,
volume = "11",
number = "2",
pages = "27:1--27:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A broad range of embedded networked sensing (ENS)
applications have appeared for large-scale systems,
introducing new requirements leading to new embedded
architectures, associated algorithms, and supporting
software systems. These new requirements include the
need for diverse and complex sensor systems that
present demands for energy and computational resources,
as well as for broadband communication. To satisfy
application demands while maintaining critical support
for low-energy operation, a new multiprocessor node
hardware and software architecture, Low Power Energy
Aware Processing (LEAP), has been developed. In this
article, we described the LEAP design approach, in
which the system is able to adaptively select the most
energy-efficient hardware components matching an
application's needs. The LEAP platform supports highly
dynamic requirements in sensing fidelity, computational
load, storage media, and network bandwidth. It focuses
on episodic operation of each component and considers
the energy dissipation for each platform task by
integrating fine-grained energy-dissipation monitoring
and sophisticated power-control scheduling for all
subsystems, including sensors. In addition to the LEAP
platform's unique hardware capabilities, its software
architecture has been designed to provide an easy way
to use power management interface and a robust,
fault-tolerant operating environment and to enable
remote upgrade of all software components. LEAP
platform capabilities are demonstrated by example
implementations, such as a network protocol design and
a light source event detection algorithm. Through the
use of a distributed node testbed, we demonstrate that
by exploiting high energy-efficiency components and
enabling proper on-demand scheduling, the LEAP
architecture may meet both sensing performance and
energy dissipation objectives for a broad class of
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2012:DCR,
author = "Weixun Wang and Prabhat Mishra and Ann Gordon-Ross",
title = "Dynamic Cache Reconfiguration for Soft Real-Time
Systems",
journal = j-TECS,
volume = "11",
number = "2",
pages = "28:1--28:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220340",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In recent years, efficient dynamic reconfiguration
techniques have been widely employed for system
optimization. Dynamic cache reconfiguration is a
promising approach for reducing energy consumption as
well as for improving overall system performance. It is
a major challenge to introduce cache reconfiguration
into real-time multitasking systems, since dynamic
analysis may adversely affect tasks with timing
constraints. This article presents a novel approach for
implementing cache reconfiguration in soft real-time
systems by efficiently leveraging static analysis
during runtime to minimize energy while maintaining the
same service level. To the best of our knowledge, this
is the first attempt to integrate dynamic cache
reconfiguration in real-time scheduling techniques. Our
experimental results using a wide variety of
applications have demonstrated that our approach can
significantly reduce the cache energy consumption in
soft real-time systems (up to 74\%).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Palermo:2012:VAR,
author = "Gianluca Palermo and Cristina Silvano and Vittorio
Zaccaria",
title = "A Variability-Aware Robust Design Space Exploration
Methodology for On-Chip Multiprocessors Subject to
Application-Specific Constraints",
journal = j-TECS,
volume = "11",
number = "2",
pages = "29:1--29:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220341",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Manufacturing process variation is dramatically
becoming one of the most important challenges related
to power and performance optimization for sub-90nm CMOS
technologies. Process variability impacts the
optimization of the target system metrics, that is,
performance and energy consumption by introducing
fluctuations and unpredictability. Besides, it impacts
the parametric yield of the chip with respect to
application level constraints by reducing the number of
devices working within normal operating conditions. The
impact of variability on systems with stringent
application-specific requirements (such as portable
multimedia and critical embedded systems) is much
greater than on general-purpose systems given the
emphasis on predictability and reduced operating
margins. In this market segment, failing to address
such a problem within the early design stages of the
chip may lead to missing market deadlines and suffering
greater economic losses. In the context of a design
space exploration framework for supporting the
platform-based design approach, we address the problem
of robustness with respect to manufacturing process
variations. First, we apply Response Surface Modeling
(RSM) techniques to enable an efficient evaluation of
the statistical measures of execution time and energy
consumption for each system configuration. Then, we
apply a robust design space exploration framework to
afford the problem of the impact of manufacturing
process variations onto the system-level metrics and
consequently onto the application-level constraints. We
finally provide a comparison of our design space
exploration technique with conventional approaches on
two different case studies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2012:UEP,
author = "Yoon Seok Yang and Gwan Choi",
title = "Unequal Error Protection Based on {DVFS} for {JSCD} in
Low-Power Portable Multimedia Systems",
journal = j-TECS,
volume = "11",
number = "2",
pages = "30:1--30:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220342",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a low-power decoder design for
joint source-channel decoding (JSCD) based on a novel
unequal error protection (UEP) scheme over additive
white Gaussian noise (AWGN) channels. Conventional JSCD
schemes, adopting low-density parity check (LDPC) codes
for multimedia devices, typically operate at a
fixed-time decoding loop, regardless of the quality of
data received. We present a JSCD scheme that achieves
reduction in power through minimum energy decoding and
dynamic voltage and frequency scaling (DVFS).
Consequently, up to 39\% power reduction is achieved in
Foreman, Akiyo, and Mobile video streams without
performance degradation in reconstructed video
quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Namin:2012:EFF,
author = "Ashkan Hosseinzadeh Namin and Huapeng Wu and Majid
Ahmadi",
title = "An Efficient Finite Field Multiplier Using Redundant
Representation",
journal = j-TECS,
volume = "11",
number = "2",
pages = "31:1--31:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220343",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "An efficient word-level finite field multiplier using
redundant representation is proposed. The proposed
multiplier has a significantly higher speed, compared
to previously proposed word-level architectures using
either redundant representation or optimal normal basis
type I, at the expense of moderately higher area
complexity. Furthermore, the new design out-performs
other similar proposals when considering the product of
area and delay as a measure of performance. ASIC
Realization of the proposed design using TSMC's 0.18 $
\mu $ m CMOS technology for the binary field size of
163 is also presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leyva-del-Foyo:2012:ITI,
author = "Luis E. Leyva-del-Foyo and Pedro Mejia-Alvarez and
Dionisio de Niz",
title = "Integrated Task and Interrupt Management for Real-Time
Systems",
journal = j-TECS,
volume = "11",
number = "2",
pages = "32:1--32:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220344",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Real-time scheduling algorithms like RMA or EDF and
their corresponding schedulability test have proven to
be powerful tools for developing predictable real-time
systems. However, the traditional interrupt management
model presents multiple inconsistencies that break the
assumptions of many of the real-time scheduling tests,
diminishing its utility. In this article, we analyze
these inconsistencies and present a model that resolves
them by integrating interrupts and tasks in a single
scheduling model. We then use the RMA theory to
calculate the cost of the model and analyze the
circumstances under which it can provide the most
value. This model was implemented in a kernel module.
The portability of the design of our module is
discussed in terms of its independence from both the
hardware and the kernel. We also discuss the
implementation issues of the model over conventional PC
hardware, along with its cost and novel optimizations
for reducing the overhead. Finally, we present our
experimental evaluation to show evidence of its
temporal determinism and overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Garg:2012:IMP,
author = "Siddharth Garg and Diana Marculescu",
title = "On the Impact of Manufacturing Process Variations on
the Lifetime of Sensor Networks",
journal = j-TECS,
volume = "11",
number = "2",
pages = "33:1--33:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220345",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The lifetime of individual nodes in a sensor network
depends strongly on the leakage power of the nodes in
idle state. With technology scaling, variability in
leakage power dissipation of sensor nodes will cause
increased variability in their lifetimes. In this
article, we analyze how the lifetime variations of
sensor nodes affect the performance of the sensor
network as a whole. We demonstrate the use of the
proposed framework to explore deployment cost versus
performance trade-offs for sensor networks. Results
indicate that up to 37\% improvement in the critical
lifetime of a sensor network can be obtained with a
20\% increase in deployment cost.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Blech:2012:GIB,
author = "Jan Olaf Blech and Micha{\"e}l P{\'e}rin",
title = "Generating Invariant-Based Certificates for Embedded
Systems",
journal = j-TECS,
volume = "11",
number = "2",
pages = "34:1--34:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220346",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Automatic verification tools, such as model checkers
and tools based on static analysis or on abstract
interpretation, have become popular in software and
hardware development. They increase confidence and
potentially provide rich feedback. However, with
increasing complexity, verification tools themselves
are more likely to contain errors. In contrast to
automatic verification tools, higher-order theorem
provers use mathematically founded proof strategies
checked by a small proof checker to guarantee selected
properties. Thus, they enjoy a high level of
trustability. Properties of software and hardware
systems and their justifications can be encapsulated
into a certificate, thereby guaranteeing correctness of
the systems, with respect to the properties. These
results offer a much higher degree of confidence than
results achieved by verification tools. However,
higher-order theorem provers are usually slow, due to
their general and minimalistic nature. Even for small
systems, a lot of human interaction is required for
establishing a certificate. In this work, we combine
the advantages of automatic verification tools (i.e.,
speed and automation) with those of higher-order
theorem provers (i.e., high level of trustability). The
verification tool generates a certificate for each
invocation. This is checked by the higher-order theorem
prover, thereby guaranteeing the desired property. The
generation of certificates is much easier than
producing the analysis results of the verification tool
in the first place. In our work, we are able to create
certificates that come with an algorithmic description
of the proof of the desired property as justification.
We concentrate on verification tools that generate
invariants of systems and certify automatically that
these do indeed hold. Our approach is applied to the
certification of the verdicts of a deadlock-detection
tool for an asynchronous component-based language.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jeong:2012:PLT,
author = "Jaein Jeong and David Culler",
title = "Predicting the Long-Term Behavior of a Micro-Solar
Power System",
journal = j-TECS,
volume = "11",
number = "2",
pages = "35:1--35:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220347",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Micro-solar power system design is challenging because
it must address long-term system behavior under highly
variable solar energy conditions and consider a large
space of design options. Several micro-solar power
systems and models have been made, validating
particular points in the whole design space. We provide
a general architecture of micro-solar power
systems---comprising key components and
interconnections among the components---and formalize
each component in an analytical or empirical model of
its behavior. To model the variability of solar energy,
we provide three solar radiation models, depending on
the degree of information available: an astronomical
model for ideal conditions, an obstructed astronomical
model for estimating solar radiation under the presence
of shadows and obstructions, and a weather-effect model
for estimating solar radiation under weather variation.
Our solar radiation models are validated with a
concrete design, the HydroWatch node, thus achieving
small deviation from the long-term measurement. They
can be used in combination with other micro-solar
system models to improve the utility of the load and
estimate the behavior of micro-solar power systems more
accurately. Thus, our solar radiation models provide
more accurate estimations of solar radiation and close
the loop for micro-solar power system modeling.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Smith:2012:OSH,
author = "Melissa C. Smith and Gregory D. Peterson",
title = "Optimization of Shared High-Performance Reconfigurable
Computing Resources",
journal = j-TECS,
volume = "11",
number = "2",
pages = "36:1--36:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220348",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the field of high-performance computing, systems
harboring reconfigurable devices, such as
field-programmable gate arrays (FPGAs), are gaining
more widespread interest. Such systems range from
supercomputers with tightly coupled reconfigurable
hardware to clusters with reconfigurable devices at
each node. The use of these architectures for
scientific computing provides an alternative for
computationally demanding problems and has advantages
in metrics, such as operating cost/performance and
power/performance. However, performance optimization of
these systems can be challenging even with knowledge of
the system's characteristics. Our analytic performance
model includes parameters representing the
reconfigurable hardware, application load imbalance
across the nodes, background user load, basic
message-passing communication, and processor
heterogeneity. In this article, we provide an overview
of the analytical model and demonstrate its application
for optimization and scheduling of high-performance
reconfigurable computing (HPRC) resources. We examine
cost functions for minimum runtime and other
optimization problems commonly found in shared
computing resources. Finally, we discuss additional
scheduling issues and other potential applications of
the model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2012:EEA,
author = "Kyoungwoo Lee and Nikil Dutt and Nalini
Venkatasubramanian",
title = "{EAVE}: {Error-Aware Video Encoding} Supporting
Extended Energy\slash {QoS} Trade-offs for Mobile
Embedded Systems",
journal = j-TECS,
volume = "11",
number = "2",
pages = "37:1--37:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220349",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy/QoS provisioning is challenging for video
applications over lossy wireless network with
power-constrained mobile handheld devices. In this
work, we exploit the inherent error tolerance of video
data to generate a range of acceptable operating points
by controlling the amount of errors in the system. In
particular, we propose an error-aware video encoding
technique, EAVE, that intentionally injects errors
while ensuring acceptable QoS. The expanded trade-off
space generated by EAVE allows system designers to
comparatively evaluate different operating points with
varying QoS and energy consumption by aggressively
exploiting error-resilience attributes, and could
potentially result in significant energy savings. The
novelty of our approach resides in active exploitation
of errors to vary the operating conditions for further
optimization of system parameters. Moreover, we present
the adaptivity of our approach by incorporating the
feedback from the decoding side to achieve the QoS
requirement under the dynamic network status. Our
experiments show that EAVE can reduce the energy
consumption for an encoding device by up to 37\% for a
video conferencing application over a wireless network
without quality degradation, compared to a standard
video encoding technique over test video streams.
Further, our experimental results demonstrate that EAVE
can expand the design space by 14 times with respect to
energy consumption and by 13 times with respect to
video quality (compared to a traditional approach
without active error exploitation) on average, over
test video streams.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2012:ART,
author = "Mingsong Chen and Prabhat Mishra and Dhrubajyoti
Kalita",
title = "Automatic {RTL} Test Generation from {SystemC TLM}
Specifications",
journal = j-TECS,
volume = "11",
number = "2",
pages = "38:1--38:??",
month = jul,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2220336.2220350",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jul 27 18:57:33 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "SystemC transaction-level modeling (TLM) is widely
used to enable early exploration for both hardware and
software designs. It can reduce the overall design and
validation effort of complex system-on-chip (SOC)
architectures. However, due to lack of automated
techniques coupled with limited reuse of validation
efforts between abstraction levels, SOC validation is
becoming a major bottleneck. This article presents a
novel top-down methodology for automatically generating
register transfer-level (RTL) tests from SystemC TLM
specifications. It makes two important contributions:
(i) it proposes a method that can automatically
generate TLM tests using various coverage metrics, and
(ii) it develops a test refinement specification for
automatically converting TLM tests to RTL tests in
order to reduce overall validation effort. We have
developed a tool which incorporates these activities to
enable automated RTL test generation from SystemC TLM
specifications. Case studies using a router example and
a 64-bit Alpha AXP pipelined processor demonstrate that
our approach can achieve intended functional coverage
of the RTL designs, as well as capture various
functional errors and inconsistencies between
specifications and implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Plaks:2012:ESS,
author = "Toomas P. Plaks",
title = "Editorial: Special Section on {CAPA'09}",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "39:1--39:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331148",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paul:2012:PRC,
author = "Anand Paul and Yung-Chuan Jiang and Jhing-Fa Wang and
Jar-Ferr Yang",
title = "Parallel Reconfigurable Computing-Based Mapping
Algorithm for Motion Estimation in Advanced Video
Coding",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "40:1--40:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331149",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Computational load of motion estimation in advanced
video coding (AVC) standard is significantly high and
even worse for HDTV and super-resolution sequences. In
this article, a video processing algorithm is
dynamically mapped onto a new parallel reconfigurable
computing (PRC) architecture which consists of multiple
dynamic reconfigurable computing (DRC) units. First, we
construct a directed acyclic graph (DAG) to represent
video coding algorithms in which motion estimation is
the focus. A novel parallel partition approach is then
proposed to map motion estimation DAG onto the multiple
DRC units in a PRC system. This partitioning algorithm
is capable of design optimization of parallel
processing reconfigurable systems for a given number of
processing elements in different search ranges. This
speeds up the video processing with minimum
sacrifice.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Suris:2012:RSC,
author = "Jorge A. Sur{\'\i}s and Adolfo Recio and Peter
Athanas",
title = "{RapidRadio}: Signal Classification and Radio
Deployment Framework",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "41:1--41:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331151",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, the RapidRadio framework for signal
classification and receiver deployment is discussed.
The framework is a productivity-enhancing tool that
reduces the required knowledge base for implementing a
receiver on an FPGA-based SDR platform. The ultimate
objective of this framework is to identify unknown
signals and to build FPGA-based receivers capable of
receiving them. RapidRadio divides the process of radio
creation into two phases; the analysis phase and radio
synthesis phase. The analysis phase guides the user
through the process of classifying an unknown signal
and determining its modulation scheme and parameters,
resulting in a radio receiver model. In the second
phase, this model is transformed into a functional
receiver in an FPGA-based platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mark:2012:HBC,
author = "Cindy Mark and Scott Y. L. Chin and Lesley Shannon and
Steven J. E. Wilton",
title = "Hierarchical Benchmark Circuit Generation for {FPGA}
Architecture Evaluation",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "42:1--42:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331152",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We describe a stochastic circuit generator that can be
used to automatically create benchmark circuits for use
in FPGA architecture studies. The circuits consist of a
hierarchy of interconnected modules, reflecting the
structure of circuits designed using a system-on-chip
design flow. Within each level of hierarchy, modules
can be connected in a bus, star, or dataflow
configuration. Our circuit generator is calibrated
based on a careful study of existing system-on-chip
circuits. We show that our benchmark circuits lead to
more realistic architectural conclusions than circuits
generated using previous generators.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reardon:2012:REE,
author = "Casey Reardon and Brian Holland and Alan D. George and
Greg Stitt and Herman Lam",
title = "{RCML}: An Environment for Estimation Modeling of
Reconfigurable Computing Systems",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "43:1--43:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331153",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Reconfigurable computing (RC) is emerging as a
promising area for embedded computing, in which complex
systems must balance performance, flexibility, cost,
and power. The difficulty associated with RC
development suggests improved strategic planning and
analysis techniques can save significant development
time and effort. This article presents a new abstract
modeling language and environment, the RC Modeling
Language (RCML), to facilitate efficient design space
exploration of RC systems at the estimation modeling
level, that is, before building a functional
implementation. Two integrated analysis tools and case
studies, one analytical and one simulative, are
presented illustrating relatively accurate automated
analysis of systems modeled in RCML.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DiBiagio:2012:AOA,
author = "Andrea {Di Biagio} and Giovanni Agosta and Martino
Sykora and Cristina Silvano",
title = "Architecture Optimization of Application-Specific
Implicit Instructions",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "44:1--44:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331154",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dynamic configuration of application-specific implicit
instructions has been proposed to better exploit the
available parallelism at the instruction level in
pipelined processors. The support of such implicit
instruction issue-requires the pipeline to be extended
with a trigger table that describes the instruction
implicitly issued as a response to a value written into
a triggering register by a triggering instruction
(which may be an add or sub instruction). In this
article, we explore the design optimization of the
trigger table to maximize the number of instructions
that can be implicitly issued while keeping the limited
size of the trigger table. The concept of implicitly
issued instruction has been formally defined by
considering the inter-basic block analysis of control
and data dependencies. A compilation tool chain has
been developed to automatically identify the
optimization opportunities, taking into account the
constraints imposed by control and data dependencies as
well as by architectural limitations. The proposed
solutions have been applied to the case of a baseline
scalar MIPS processor where, for the selected set of
benchmarks (DSPStone and Mibench/automotive), we
obtained an average speedup of 17\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Napapetian:2012:ESS,
author = "Ani Napapetian and William Kaiser and Majid
Sarrafzadeh",
title = "Editorial: Special Section on {WHS'09}",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "45:1--45:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331155",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guenterberg:2012:ASR,
author = "Eric Guenterberg and Hassan Ghasemzadeh and Roozbeh
Jafari",
title = "Automatic Segmentation and Recognition in Body Sensor
Networks Using a Hidden {Markov} Model",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "46:1--46:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331156",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "One important application of body sensor networks is
action recognition. Action recognition often implicitly
requires partitioning sensor data into intervals, then
labeling the partitions according to the action that
each represents or as a non-action. The temporal
partitioning stage is called segmentation, and the
labeling is called classification. While many effective
methods exist for classification, segmentation remains
problematic. We present a technique inspired by
continuous speech recognition that combines
segmentation and classification using hidden Markov
models. This technique is distributed across several
sensor nodes. We show the results of this technique and
the bandwidth savings over full data transmission.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pradhan:2012:AVJ,
author = "Gaurav N. Pradhan and B. Prabhakaran",
title = "Analyzing and Visualizing Jump Performance Using
Wireless Body Sensors",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "47:1--47:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331157",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advancement in technology has led to the deployment of
body sensor networks (BSN) to monitor and sense human
activity in pervasive environments. Using multiple
wireless on-body systems, such as physiological data
monitoring and motion capture systems, body sensor
network data consists of heterogeneous physiologic and
motoric streams that form a multidimensional framework.
In this article, we analyze such high-dimensional body
sensor network data by proposing an efficient,
multidimensional factor analysis technique for
quantifying human performance and, at the same time,
providing visualization for performances of
participants in a low-dimensional space for easier
interpretation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Thatte:2012:KEE,
author = "Gautam Thatte and Ming Li and Sangwon Lee and Adar
Emken and Shrikanth Narayanan and Urbashi Mitra and
Donna Spruijt-Metz and Murali Annavaram",
title = "{KNOWME}: An Energy-Efficient Multimodal Body Area
Network for Physical Activity Monitoring",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "48:1--48:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331158",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The use of biometric sensors for monitoring an
individual's health and related behaviors, continuously
and in real time, promises to revolutionize healthcare
in the near future. In an effort to better understand
the complex interplay between one's medical condition
and social, environmental, and metabolic parameters,
this article presents the KNOWME platform, a complete,
end-to-end, body area sensing system that integrates
off-the-shelf biometric sensors with a Nokia N95 mobile
phone to continuously monitor the metabolic signals of
a subject. With a current focus on pediatric obesity,
KNOWME employs metabolic signals to monitor and
evaluate physical activity. KNOWME development and
in-lab deployment studies have revealed three major
challenges: (1) the need for robustness to highly
varying operating environments due to subject-induced
variability, such as mobility or sensor placement; (2)
balancing the tension between achieving high fidelity
data collection and minimizing network energy
consumption; and (3) accurate physical activity
detection using a modest number of sensors. The KNOWME
platform described herein directly addresses these
three challenges. Design robustness is achieved by
creating a three-tiered sensor data collection
architecture. The system architecture is designed to
provide robust, continuous, multichannel data
collection and scales without compromising normal
mobile device operation. Novel physical activity
detection methods which exploit new representations of
sensor signals provide accurate and efficient physical
activity detection. The physical activity detection
method employs personalized training phases and
accounts for intersession variability. Finally,
exploiting the features of the hardware implementation,
a low-complexity sensor sampling algorithm is
developed, resulting in significant energy savings
without loss of performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Banerjee:2012:BAT,
author = "Ayan Banerjee and Sailesh Kandula and Tridib Mukherjee
and Sandeep K. S. Gupta",
title = "{BAND-AiDe}: a Tool for Cyber-Physical Oriented
Analysis and Design of Body Area Networks and Devices",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "49:1--49:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331159",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Body area networks (BANs) are networks of medical
devices implanted within or worn on the human body.
Analysis and verification of BAN designs require (i)
early feedback on the BAN design and (ii)
high-confidence evaluation of BANs without requiring
any hazardous, intrusive, and costly deployment. Any
design of BAN further has to ensure (i) the safety of
the human body, that is, limiting any undesirable
side-effects (e.g., heat dissipation) of BAN operations
(involving sensing, computation, and communication
among the devices) on the human body, and (ii) the
sustainability of the BAN operations, that is, the
continuation of the operations under constrained
resources (e.g., limited battery power in the devices)
without requiring any redeployments. This article uses
the Model Based Engineering (MBE) approach to perform
design and analysis of BANs. In this regard, first, an
abstract cyber-physical model of BANs, called BAN-CPS,
is proposed that captures the undesirable side-effects
of the medical devices (cyber) on the human body
(physical); second, a design and analysis tool, named
BAND-AiDe, is developed that allows specification of
BAN-CPS using industry standard Abstract Architecture
Description Language (AADL) and enables safety and
sustainability analysis of BANs; and third, the
applicability of BAND-AiDe is shown through a case
study using both single and a network of medical
devices for health monitoring applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hanson:2012:AFE,
author = "Mark A. Hanson and Harry C. {Powell, Jr.} and Adam T.
Barth and John Lach",
title = "Application-Focused Energy-Fidelity Scalability for
Wireless Motion-Based Health Assessment",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "50:1--50:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331160",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy-fidelity trade-offs are central to the
performance of many technologies, but they are
essential in wireless body area sensor networks (BASNs)
due to severe energy and processing constraints and the
critical nature of certain healthcare applications.
On-node signal processing and compression techniques
can save energy by greatly reducing the amount of data
transmitted over the wireless channel, but lossy
techniques, capable of high compression ratios, can
incur a reduction in application fidelity. In order to
maximize system performance, these trade-offs must be
considered at runtime due to the dynamic nature of BASN
applications, including sensed data, operating
environments, user actuation, etc. BASNs therefore
require energy-fidelity scalability, so automated and
user-initiated trade-offs can be made dynamically. This
article presents a data rate scalability framework
within a motion-based health application context which
demonstrates the design of efficient and efficacious
wireless health systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boulis:2012:IWC,
author = "Athanassios Boulis and Yuriy Tselishchev and Lavy
Libman and David Smith and Leif Hanlen",
title = "Impact of Wireless Channel Temporal Variation on {MAC}
Design for Body Area Networks",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "51:1--51:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331161",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We investigate the impact of wireless channel temporal
variations on the design of medium access control (MAC)
protocols for body area networks (BANs). Our
measurements-based channel model captures large and
small time-scale signal correlations, giving an
accurate picture of the signal variation, specifically,
the deep fades which are the features that mostly
affect the behavior of the MAC. We test the effect of
the channel model on the performance of the 802.15.4
MAC both in contention access mode and TDMA access
mode. We show that there are considerable differences
in the performance of the MAC compared to simulations
that do not model channel temporal variation.
Furthermore, explaining the behavior of the MAC under a
temporal varying channel, we can suggest specific
design choices for the emerging BAN MAC standard.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fainekos:2012:ESS,
author = "Georgios Fainekos and Eric Goubault and Franjo
Ivanci{\'c} and Sriram Sankaranarayanan",
title = "Editorial: Special Section {VCPSS'09}",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "52:1--52:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331162",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wongpiromsarn:2012:VPC,
author = "Tichakorn Wongpiromsarn and Sayan Mitra and Andrew
Lamperski and Richard M. Murray",
title = "Verification of Periodically Controlled Hybrid
Systems: Application to an Autonomous Vehicle",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "53:1--53:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331163",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces Periodically Controlled Hybrid
Automata (PCHA) for modular specification of embedded
control systems. In a PCHA, control actions that change
the control input to the plant occur roughly
periodically, while other actions that update the state
of the controller may occur in the interim. Such
actions could model, for example, sensor updates and
information received from higher-level planning modules
that change the set point of the controller. Based on
periodicity and subtangential conditions, a new
sufficient condition for verifying invariant properties
of PCHAs is presented. For PCHAs with polynomial
continuous vector fields, it is possible to check these
conditions automatically using, for example, quantifier
elimination or sum of squares decomposition. We examine
the feasibility of this automatic approach on a small
example. The proposed technique is also used to
manually verify safety and progress properties of a
fairly complex planner-controller subsystem of an
autonomous ground vehicle. Geometric properties of
planner-generated paths are derived which guarantee
that such paths can be safely followed by the
controller.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Girard:2012:VSL,
author = "Antoine Girard and Gang Zheng",
title = "Verification of Safety and Liveness Properties of
Metric Transition Systems",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "54:1--54:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331164",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We consider verification problems for transition
systems enriched with a metric structure. We believe
that these metric transition systems are particularly
suitable for the analysis of cyber-physical systems in
which metrics can be naturally defined on the numerical
variables of the embedded software and on the
continuous states of the physical environment. We
consider verification of bounded and unbounded safety
properties, as well as bounded liveness properties. The
transition systems we consider are nondeterministic,
finitely branching, and with a finite set of initial
states. Therefore, bounded safety/liveness properties
can always be verified by exhaustive exploration of the
system trajectories. However, this approach may be
intractable in practice, as the number of trajectories
usually grows exponentially with respect to the
considered bound. Furthermore, since the system we
consider can have an infinite set of states, exhaustive
exploration cannot be used for unbounded safety
verification. For bounded safety properties, we propose
an algorithm which combines exploration of the system
trajectories and state space reduction using merging
based on a bisimulation metric. The main novelty
compared to an algorithm presented recently by Lerda et
al. [2008] consists in introducing a tuning parameter
that improves the performance drastically. We also
establish a procedure that allows us to prove unbounded
safety from the result of the bounded safety algorithm
via a refinement step. We then adapt the algorithm to
handle bounded liveness verification. Finally, the
effectiveness of the approach is demonstrated by
applying it to the analysis of implementations of an
embedded control loop.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seshia:2012:QAS,
author = "Sanjit A. Seshia and Alexander Rakhlin",
title = "Quantitative Analysis of Systems Using Game-Theoretic
Learning",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "55:1--55:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331165",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The analysis of quantitative properties, such as
timing and power, is central to the design of reliable
embedded software and systems. However, the
verification of such properties on a program is made
difficult by their heavy dependence on the program's
environment, such as the processor it runs on. Modeling
the environment by hand can be tedious, error prone,
and time consuming. In this article, we present a new
game-theoretic approach to analyzing quantitative
properties that is based on performing systematic
measurements to automatically learn a model of the
environment. We model the problem as a game between our
algorithm (player) and the environment of the program
(adversary) in which the player seeks to accurately
predict the property of interest, while the adversary
sets environment states and parameters. To solve this
problem, we employ a randomized strategy that
repeatedly tests the program along a linear-sized set
of program paths called basis paths, using the
resulting measurements to infer a weighted-graph model
of the environment from which quantitative properties
can be predicted. Test cases are automatically
generated using satisfiability modulo theories (SMT)
solving. We prove that our algorithm can, under certain
assumptions and with arbitrarily high probability,
accurately predict properties such as worst-case
execution time or estimate the distribution of
execution times. Experimental results for execution
time analysis demonstrate that our approach is
efficient, accurate, and highly portable.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2012:MCB,
author = "Lan Wu and Wei Zhang",
title = "A Model Checking Based Approach to Bounding Worst-Case
Execution Time for Multicore Processors",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "56:1--56:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331166",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As multicore processors are increasingly adopted in
industry, it has become a great challenge to accurately
bound the worst-case execution time (WCET) for
real-time systems running on multicore chips. This is
particularly true because of the inter-thread
interferences in accessing shared resources on
multicores, such as shared L2 caches, which can
significantly affect the performance but are very
difficult to be estimated statically. This article
proposes an approach to analyzing WCET for multicore
processors with shared L2 instruction caches by using a
model checking based method. We model each concurrent
real-time thread, including the inter-thread cache
interferences with a PROMELA process, and derive the
WCET by using a binary search algorithm. To reduce the
state explosion problem, we propose several techniques
for reducing the memory consumption by exploiting
domain-specific information. Our experiments indicate
that compared to the static analysis technique based on
extended ILP (integer linear programming), our approach
improves the tightness of WCET estimation by more than
31.1\% for the benchmarks we studied. However, due to
the inherent complexity of multicore timing analysis
and the state explosion problem, the model checking
based approach currently can only work with small
real-time kernels for dual-core processors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tang:2012:UMS,
author = "Qinghui Tang and Sandeep K. S. Gupta and Georgios
Varsamopoulos",
title = "A Unified Methodology for Scheduling in Distributed
Cyber-Physical Systems",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "57:1--57:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331167",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A distributed cyber-physical system (DCPS) may receive
and induce energy-based interference to and from its
environment. This article presents a model and an
associated methodology that can be used to (i) schedule
tasks in DCPSs to ensure that the thermal effects of
the task execution are within acceptable levels, and
(ii) verify that a given schedule meets the
constraints. The model uses coarse discretization of
space and linearity of interference. The methodology
involves characterizing the interference of the task
execution and fitting it into the model, then using the
fitted model to verify a solution or explore the
solution space.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nghiem:2012:TTI,
author = "Truong Nghiem and George J. Pappas and Rajeev Alur and
Antoine Girard",
title = "Time-Triggered Implementations of Dynamic
Controllers",
journal = j-TECS,
volume = "11",
number = "S2",
pages = "58:1--58:??",
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2331147.2331168",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 6 09:57:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Bridging the gap between model-based design and
platform-based implementation is one of the critical
challenges for embedded software systems. In the
context of embedded control systems that interact with
an environment, a variety of errors due to
quantization, delays, and scheduling policies may
generate executable code that does not faithfully
implement the model-based design. In this article, we
show that the performance gap between the model-level
semantics of linear dynamic controllers, for example,
the proportional-integral-derivative (PID) controllers
and their implementation-level semantics, can be
rigorously quantified if the controller implementation
is executed on a predictable time-triggered
architecture. Our technical approach uses lifting
techniques for periodic time-varying linear systems in
order to compute the exact error between the model
semantics and the execution semantics. Explicitly
computing the impact of the implementation on overall
system performance allows us to compare and partially
order different implementations with various scheduling
or timing characteristics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dong:2012:UAS,
author = "Qi Dong and Donggang Liu",
title = "Using Auxiliary Sensors for Pairwise Key Establishment
in {WSN}",
journal = j-TECS,
volume = "11",
number = "3",
pages = "59:1--59:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345771",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many techniques have been developed recently for
establishing pairwise keys in sensor networks. However,
some of them are vulnerable to a few compromised sensor
nodes, while others could involve expensive protocols
for establishing keys. This article introduces a much
better alternative that can achieve both high
resilience to node compromises and high efficiency in
key establishment. The main idea is to deploy a small
number of additional sensor nodes, called assisting
nodes, to help key establishment between sensor nodes.
The proposed approach has many advantages over existing
approaches. In particular, a sensor node only needs to
make a few local communications and perform a few
efficient hash operations to setup a key with any other
sensor node in the network at a very high probability.
The majority of sensor nodes only need to store a
single key. Besides, it also provides high resilience
to node compromises. The theoretical analysis,
simulation studies, and experiments on TelosB sensor
motes also demonstrate the advantages of this key
establishment protocol in sensor networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Arora:2012:ILM,
author = "Divya Arora and Najwa Aaraj and Anand Raghunathan and
Niraj K. Jha",
title = "{INVISIOS}: a Lightweight, Minimally Intrusive Secure
Execution Environment",
journal = j-TECS,
volume = "11",
number = "3",
pages = "60:1--60:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345772",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many information security attacks exploit
vulnerabilities in ``trusted'' and privileged software
executing on the system, such as the operating system
(OS). On the other hand, most security mechanisms
provide no immunity to security-critical user
applications if vulnerabilities are present in the
underlying OS. While technologies have been proposed
that facilitate isolation of security-critical
software, they require either significant computational
resources and are hence not applicable to many
resource-constrained embedded systems, or necessitate
extensive redesign of the underlying processors and
hardware. In this work, we propose INVISIOS: a
lightweight, minimally intrusive hardware-software
architecture to make the execution of security-critical
software invisible to the OS, and hence protected from
its vulnerabilities. The INVISIOS software architecture
encapsulates the security-critical software into a
self-contained software module. While this module is
part of the kernel and is run with kernel-level
privileges, its code, data, and execution are
transparent to and protected from the rest of the
kernel. The INVISIOS hardware architecture consists of
simple add-on hardware components that are responsible
for bootstrapping the secure core, ensuring that it is
exercised by applications in only permitted ways, and
enforcing the isolation of its code and data. We
implemented INVISIOS by enhancing a full-system
emulator and Linux to model the proposed software and
hardware enhancements, and applied it to protect a
commercial cryptographic library. Our experiments
demonstrate that INVISIOS is capable of facilitating
secure execution at very small overheads, making it
suitable for resource-constrained embedded systems and
systems-on-chip.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Izosimov:2012:SOF,
author = "Viacheslav Izosimov and Paul Pop and Petru Eles and
Zebo Peng",
title = "Scheduling and Optimization of Fault-Tolerant Embedded
Systems with Transparency\slash Performance
Trade-Offs",
journal = j-TECS,
volume = "11",
number = "3",
pages = "61:1--61:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345773",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose a strategy for the
synthesis of fault-tolerant schedules and for the
mapping of fault-tolerant applications. Our techniques
handle transparency/performance trade-offs and use the
fault-occurrence information to reduce the overhead due
to fault tolerance. Processes and messages are
statically scheduled, and we use process reexecution
for recovering from multiple transient faults. We
propose a fine-grained transparent recovery, where the
property of transparency can be selectively applied to
processes and messages. Transparency hides the recovery
actions in a selected part of the application so that
they do not affect the schedule of other processes and
messages. While leading to longer schedules,
transparent recovery has the advantage of both improved
debuggability and less memory needed to store the
fault-tolerant schedules.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2012:PAA,
author = "Shengqi Yang and Pallav Gupta and Marilyn Wolf and
Dimitrios Serpanos and Vijaykrishnan Narayanan and Yuan
Xie",
title = "Power Analysis Attack Resistance Engineering by
Dynamic Voltage and Frequency Scaling",
journal = j-TECS,
volume = "11",
number = "3",
pages = "62:1--62:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345774",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a novel approach to cryptosystem
design to prevent power analysis attacks. Such attacks
infer program behavior by continuously monitoring the
power supply current going into the processor core.
They form an important class of security attacks. Our
approach is based on dynamic voltage and frequency
scaling (DVFS), which hides processor state to make it
harder for an attacker to gain access to a secure
system. Three designs are studied to test the efficacy
of the DVFS method against power analysis attacks. The
advanced realization of our cryptosystem is presented
which achieves enough high power and time trace
entropies to block various kinds of power analysis
attacks in the DES algorithm. We observed 27\% energy
reduction and 16\% time overhead in these algorithms.
Finally, DVFS hardness analysis is presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shokry:2012:HSS,
author = "Hesham Shokry and Hatem M. El-Boghdadi",
title = "On Heuristic Solutions to the Simple Offset Assignment
Problem in Address-Code Optimization",
journal = j-TECS,
volume = "11",
number = "3",
pages = "63:1--63:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345775",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The increasing demand for more functionality in
embedded systems applications nowadays requires
efficient generation of compact code for embedded DSP
processors. Because such processors have highly
irregular data-paths, compilers targeting those
processors are challenged with the automatic generation
of optimized code with competent quality comparable to
hand-crafted code. A major issue in code-generation is
to optimize the placement of program variables in ROM
relative to each other so as to reduce the overhead
instructions dedicated for address computations. Modern
DSP processors are typically shipped with a feature
called Address Generation Unit (AGU) that provides
efficient address-generation instructions for accessing
program variables. Compilers targeting those processors
are expected to exploit the AGU to optimize variables
assignment. This article focuses on one of the basic
offset-assignment problems; the Simple Offset
Assignment (SOA) problem, where the AGU has only one
Address Register and no Modify Registers. The notion of
Tie-Break Function, TBF, introduced by Leupers and
Marwedel [1996], has been used to guide the placement
of variables in memory. In this article, we introduce a
more effective form of the TBF; the Effective
Tie-Breaking Function, ETBF, and show that the ETBF is
better at guiding the variables placement process.
Underpinning ETBF is the fact that program variables
are placed in memory in sequence, with each variable
having only two neighbors. We applied our technique to
randomly generated graphs as well as to real-world code
from the OffsetStone testbench [2010]. In previous work
[Ali et al. 2008], our technique showed up to 7\%
reduction in overhead when applied to
randomly-generated problem instances. We report in this
article on a further experiment of our technique on
real-code from the OffsetStone testbench. Despite the
substantial improvement our technique has achieved when
applied to random problem instances, we found that it
shows slight overhead reduction when applied to
real-world instances in OffsetStone, which agrees with
similar existing experiments. We analyze these results
and show that the ETBF defaults to TBF.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Girodias:2012:IMO,
author = "Bruno Girodias and Luiza Gheorghe Iugan and Youcef
Bouchebaba and Gabriela Nicolescu and El Mostapha
Abouhamid and Michel Langevin and Pierre Paulin",
title = "Integrating Memory Optimization with Mapping
Algorithms for Multi-Processors System-on-Chip",
journal = j-TECS,
volume = "11",
number = "3",
pages = "64:1--64:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345776",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Due to their great ability to parallelize at a very
high integration level, Multi-Processors
Systems-on-Chip (MPSoCs) are good candidates for
systems and applications such as multimedia. Memory is
becoming a key player for significant improvements in
these applications (power, performance and area). The
large amount of data manipulated by these applications
requires high-capacity computing and memory. Lately,
new programming models have been introduced. This leads
to the need of new optimization and mapping techniques
suitable for embedded systems and their programming
models. This article presents novel approaches for
combining memory optimization with mapping of
data-driven applications while considering
anti-dependence conflicts. Two different approaches are
studied and integrated with existing mapping
algorithms. The first approach (based on heuristic
algorithms) keeps the graph transformation for memory
optimization stage from the mapping stage and enables
their combination in a design flow. The second approach
(based on evolutionary algorithms) combines these two
stages and integrates them in a unique stage. Some
significant improvements are obtained for memory gain,
communication load and physical links.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhong:2012:SNL,
author = "Ziguo Zhong and Tian He",
title = "Sensor Node Localization with Uncontrolled Events",
journal = j-TECS,
volume = "11",
number = "3",
pages = "65:1--65:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345777",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Event-driven localization has been proposed as a
low-cost solution for node positioning in wireless
sensor networks. In order to eliminate the costly
requirement for accurate event control in existing
methods, we present a practical design using
uncontrolled events. The main idea is to estimate both
event generation parameters and the location of sensor
nodes simultaneously, by processing node sequences that
can be easily obtained from event detections. Besides
the basic design, we proposed two enhancements to
further extract information embedded in node orderings
for two scenarios: (i) node density is high; and (ii)
abundant events are available. To demonstrate the
generality of our design, both straight-line scan and
circular wave propagation events are addressed in the
article, and we evaluated the design with extensive
simulation as well as a testbed implementation with 41
MICAz motes. Results show that with only randomly
generated events, our design can effectively localize
nodes with great flexibility while adding little extra
cost at the resource constrained sensor node side. In
addition, localization via uncontrolled events provides
a potential option of achieving node positioning
through long-term ambient events.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kumar:2012:ECI,
author = "Karthik Kumar and Yamini Nimmagadda and Yung-Hsiang
Lu",
title = "Energy Conservation for Image Retrieval on Mobile
Systems",
journal = j-TECS,
volume = "11",
number = "3",
pages = "66:1--66:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345779",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Mobile systems such as PDAs and cell phones play an
increasing role in handling visual contents such as
images. Thousands of images can be stored in a mobile
system with the advances in storage technology: this
creates the need for better organization and retrieval
of these images. Content Based Image Retrieval (CBIR)
is a method to retrieve images based on their visual
contents. In CBIR, images are compared by matching
their numerical representations called features; CBIR
is computation and memory intensive and consumes
significant amounts of energy. This article examines
energy conservation for CBIR on mobile systems. We
present three improvements to save energy while
performing the computation on the mobile system:
selective loading, adaptive loading, and caching
features in memory. Using these improvements adaptively
reduces the features to be loaded into memory for each
search. The reduction is achieved by estimating the
difficulty of the search. If the images in the
collection are dissimilar, fewer features are
sufficient; less computation is performed and energy
can be saved. We also consider the effect of
consecutive user queries and show how features can be
cached in memory to save energy. We implement a CBIR
algorithm on an HP iPAQ hw6945 and show that these
improvements can save energy and allow CBIR to scale up
to 50,000 images on a mobile system. We further
investigate if energy can be saved by migrating parts
of the computation to a server, called computation
offloading. We analyze the impact of the wireless
bandwidth, server speed, number of indexed images, and
the number of image queries on the energy consumption.
Using our scheme, CBIR can be made energy efficient
under all conditions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2012:IMR,
author = "Jaehwan John Lee and Xiang Xiao",
title = "Instant Multiunit Resource Hardware Deadlock Detection
Scheme for System-on-Chips",
journal = j-TECS,
volume = "11",
number = "3",
pages = "67:1--67:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345780",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, a brand new method of determining
deadlock is presented. Most previous deadlock detection
methods are algorithmic in the sense that they usually
leverage some forms of Resource Allocation Graph (RAG)
representations and then algorithms are devised to
manipulate such representations in order to detect
deadlock using information contained in the graph.
Different from all previous methods, the proposed
method actualizes the RAG with a digital circuit and
uses it as a token-transmitting network. By supplying
special input signals (tokens) to the network and
observing the output tokens from the network, it is
easier to identify which process nodes are reachable
from each resource node in the graph. Using the
reachability information, deadlock can be detected
immediately. The time required to obtain the
reachability information is determined by how fast the
combinational circuit operates. Compared with previous
algorithmic methods, the proposed deadlock detection
can be deemed instant. We show that the proposed method
is an order of magnitude faster than the previous
fastest hardware mechanism and several orders of
magnitude faster than traditional software-based
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zappi:2012:NLP,
author = "Piero Zappi and Daniel Roggen and Elisabetta Farella
and Gerhard Tr{\"o}ster and Luca Benini",
title = "Network-Level Power-Performance Trade-Off in Wearable
Activity Recognition: a Dynamic Sensor Selection
Approach",
journal = j-TECS,
volume = "11",
number = "3",
pages = "68:1--68:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345781",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wearable gesture recognition enables context aware
applications and unobtrusive HCI. It is realized by
applying machine learning techniques to data from
on-body sensor nodes. We present an gesture recognition
system minimizing power while maintaining a run-time
application defined performance target through dynamic
sensor selection. Compared to the non managed approach
optimized for recognition accuracy (95\% accuracy), our
technique can extend network lifetime by 4 times with
accuracy {$>$90}\% and by 9 times with accuracy
{$>$70}\%. We characterize the approach and outline its
applicability to other scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ambrose:2012:RII,
author = "Jude A. Ambrose and Roshan G. Ragel and Sri
Parameswaran",
title = "Randomized Instruction Injection to Counter Power
Analysis Attacks",
journal = j-TECS,
volume = "11",
number = "3",
pages = "69:1--69:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2345770.2345782",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 22 10:44:19 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Side-channel attacks in general and power analysis
attacks in particular are becoming a major security
concern in embedded systems. Countermeasures proposed
against power analysis attacks are data and table
masking, current flattening, dummy instruction
insertion and bit-flips balancing. All these techniques
are either susceptible to multi-order power analysis
attack, not sufficiently generic to cover all
encryption algorithms, or burden the system with high
area, run-time or energy cost. In this article, we
propose a randomized instruction injection technique
(RIJID) that overcomes the pitfalls of previous
countermeasures. RIJID scrambles the power profile of a
cryptographic application by injecting random
instructions at random points of execution and
therefore protects the system against power analysis
attacks. Two different ways of triggering the
instruction injection are also presented: (1)
softRIJID, a hardware/software approach, where special
instructions are used in the code for triggering the
injection at runtime; and (2) autoRIJID, a hardware
approach, where the code injection is triggered by the
processor itself via detecting signatures of encryption
routines at runtime. A novel signature detection
technique is also introduced for identifying encryption
routines within application programs at runtime.
Further, a simple obfuscation metric (RIJIDindex) based
on cross-correlation that measures the scrambling
provided by any code injection technique is introduced,
which coarsely indicates the level of scrambling
achieved. Our processor models cost 1.9\% additional
area in the hardware/software approach and 1.2\% in the
hardware approach for a RISC based processor, and costs
on average 29.8\% in runtime and 27.1\% in energy for
the former and 25.0\% in runtime and 28.5\% in energy
for the later, for industry standard cryptographic
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pimentel:2012:ISS,
author = "Andy D. Pimentel and Naehyuck Chang and Mladen
Berekovic",
title = "Introduction to special section {ESTIMedia'09}",
journal = j-TECS,
volume = "11",
number = "4",
pages = "70:1--70:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362337",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paterna:2012:VTW,
author = "Francesco Paterna and Andrea Acquaviva and Francesco
Papariello and Giuseppe Desoli and Luca Benini",
title = "Variability-tolerant workload allocation for {MPSoC}
energy minimization under real-time constraints",
journal = j-TECS,
volume = "11",
number = "4",
pages = "71:1--71:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362338",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sub-50nm CMOS technologies are affected by significant
variability, which causes power and performance
variations among nominally similar cores in MPSoC
platforms. This undesired heterogeneity threatens
execution predictability and energy efficiency. We
propose two techniques to allocate sets of
barrier-synchronized tasks. The first technique models
allocation as an ILP and achieves optimal results, but
requires an offline solver. The second technique adopts
a two-stage heuristic approach, and it can be adapted
to work online. We tested our approach on the virtual
prototype of a next-generation industrial multicore
platform. Experimental results demonstrate that our
approach minimizes deadline violations while increasing
energy efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tsutsui:2012:HTP,
author = "Hiroshi Tsutsui and Koichi Hattori and Hiroyuki Ochi
and Yukihiro Nakamura",
title = "A high-throughput pipelined parallel architecture for
{JPEG XR} encoding",
journal = j-TECS,
volume = "11",
number = "4",
pages = "72:1--72:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "JPEG XR is an emerging image coding standard, based on
HD Photo developed by Microsoft Corporation. It
supports high compression performance twice as high as
the de facto image coding system, namely, JPEG, and
also has an advantage over JPEG 2000 in terms of
computational cost. JPEG XR is expected to be
widespread for many devices including embedded systems
in the near future. In this article, we propose a novel
architecture for JPEG XR encoding. In previous
architectures, entropy coding was the throughput
bottleneck because it was implemented as a sequential
algorithm to handle data with dependency. We found that
there is no dependency in intra-macroblock data, and we
could safely pipeline all the encoding processes
including the entropy coding. In addition, each module
of our architecture, which can be regarded as a
pipeline stage, can be parallelized. As a result, our
architecture can achieve 12.8 pixel/cycle at its
maximum. To demonstrate our architecture, we designed
three versions of our architecture with different
degrees of parallelism of one, two, and four. Our
four-way parallel architecture achieves 579 Mpixel/sec
at 181MHz clock frequency for grayscale images.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2012:XFM,
author = "Minyoung Kim and Mark-Oliver Stehr and Carolyn Talcott
and Nikil Dutt and Nalini Venkatasubramanian",
title = "{xTune}: a formal methodology for cross-layer tuning
of mobile embedded systems",
journal = j-TECS,
volume = "11",
number = "4",
pages = "73:1--73:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362340",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Resource-limited mobile embedded systems can benefit
greatly from dynamic adaptation of system parameters.
We propose a novel approach that employs iterative
tuning using lightweight formal verification at runtime
with feedback for dynamic adaptation. One objective of
this approach is to enable trade-off analysis across
multiple layers (e.g., application, middleware, OS) and
predict the possible property violations as the system
evolves dynamically over time. Specifically, an
executable formal specification is developed for each
layer of the mobile system under consideration. The
formal specification is then analyzed using statistical
property checking and statistical quantitative
analysis, to determine the impact of various resource
management policies for achieving desired timing/QoS
properties. Integration of formal analysis with dynamic
behavior from system execution results in a feedback
loop that enables model refinement and further
optimization of policies and parameters. We demonstrate
the applicability of this approach to the adaptive
provisioning of resource-limited distributed real-time
systems using a mobile multimedia case study.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dick:2012:ISS,
author = "Robert Dick and Li Shang and Nikil Dutt",
title = "Introduction to special section {SCPS'09}",
journal = j-TECS,
volume = "11",
number = "4",
pages = "74:1--74:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362341",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Koutsoukos:2012:PAM,
author = "Xenofon Koutsoukos and Nicholas Kottenstette and
Joseph Hall and Emeka Eyisi and Heath Leblanc and
Joseph Porter and Janos Sztipanovits",
title = "A passivity approach for model-based compositional
design of networked control systems",
journal = j-TECS,
volume = "11",
number = "4",
pages = "75:1--75:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362342",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The integration of physical systems through computing
and networking has become pervasive, a trend now known
as cyber-physical systems (CPS). Functionality in CPS
emerges from the interaction of networked computational
and physical objects. System design and integration are
particularly challenging because fundamentally
different physical and computational design concerns
intersect. The impact of these interactions is the loss
of compositionality which creates tremendous
challenges. The key idea in this article is to use
passivity for decoupling the control design of
networked systems from uncertainties such as time
delays and packet loss, thus providing a fundamental
simplification strategy that limits the complexity of
interactions. The main contribution is the application
of the approach to an experimental case study of a
networked multi-robot system. We present a networked
control architecture that ensures the overall system
remains stable in spite of implementation uncertainties
such as network delays and data dropouts, focusing on
the technical details required for the implementation.
We describe a prototype domain-specific modeling
language and automated code generation tools for the
design of networked control systems on top of passivity
that facilitate effective system configuration,
deployment, and testing. Finally, we present
experimental evaluation results that show decoupling of
interlayer interactions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shin:2012:CTC,
author = "Donghwa Shin and Jaehyun Park and Younghyun Kim and
Jaeam Seo and Naehyuck Chang",
title = "Control-theoretic cyber-physical system modeling and
synthesis: a case study of an active direct methanol
fuel cell",
journal = j-TECS,
volume = "11",
number = "4",
pages = "76:1--76:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362343",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A joint optimization of the physical system and the
cyber world is one of the key problems in the design of
a cyber-physical system (CPS). The major mechanical
forces and/or chemical reactions in a plant are
commonly modified by actuators in the balance-of-plant
(BOP) system. More powerful actuators requires more
power, but generally increase the response of the
physical system powered by the electrical energy
generated by the physical system. To maximize the
overall output of a power generating plant therefore
requires joint optimization of the physical system and
the cyber world, and this is a key factor in the design
of a CPS. We introduce a systematic approach to the
modeling and synthesis of a CPS that emphasize joint
power optimization, using an active direct methanol
fuel cell (DMFC) as a case study. Active DMFC systems
are superior to passive DMFCs in terms of fuel
efficiency thanks to their BOP system, which includes
pumps, air blowers, and fans. However, designing a
small-scale active DMFC with the best overall system
efficiency requires the BOP system to be jointly
optimized with the DMFC stack operation, because the
BOP components are powered by the stack. Our approach
to this synthesis problem involves (i) BOP system
characterization, (ii) integrated DMFC system modeling,
(iii) configuring a system for the maximum net power
output through design space exploration, (iv) synthesis
of feedback control tasks, and (v) implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Malik:2012:SLA,
author = "Avinash Malik and Zoran Salcic and Christopher Chong
and Salman Javed",
title = "System-level approach to the design of a smart
distributed surveillance system using {SystemJ}",
journal = j-TECS,
volume = "11",
number = "4",
pages = "77:1--77:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362344",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Distributed surveillance systems represent a class of
sensor networks used for object location and tracking,
road traffic monitoring, security, and other purposes.
They are very complex to describe, design, and run.
Because of their sensitivity, they need to be carefully
designed and validated. We present a system-level
approach to modeling and designing such systems using a
new system-level programming language, SystemJ, which
enables designers to describe computational and
communication parts of such applications in a highly
abstract manner. The designed system can be modeled and
validated even before deployment and in that way
contribute to the overall reliability and
trustworthiness of such systems. As an additional tool,
the design environment for specification of the
surveillance system topology, physical and
communication properties, selected sensors and their
interconnectivity with the computing resources was
developed. This tool enables easy composition of
multiple sensors and their respective controllers,
capturing changes of configuration of the system and
underlying communication, and automatic generation of
the formal description of the surveillance system. This
description is then used for the generation of
executable code and/or the templates for detailed
SystemJ application-specific code, as well as for
generation of the operator GUI in a surveillance
system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yoong:2012:ICC,
author = "Li Hsien Yoong and Partha S. Roop and Zoran Salcic",
title = "Implementing constrained cyber-physical systems with
{IEC 61499}",
journal = j-TECS,
volume = "11",
number = "4",
pages = "78:1--78:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362345",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-physical systems (CPS) are integrations of
computation and control with sensing and actuation of
the physical environment. Typically, such systems
consist of embedded computers that monitor and control
physical processes in a feedback loop. While modern
electronic systems are increasingly characterized as
CPS, their design and synthesis still rely on
traditional methods, which lack systematic and
automated techniques for accomplishment. Recently, IEC
61499 has been proposed as a standard for designing
industrial process-control and measurement systems. It
prescribes a component-based approach for developing
industrial automation software using function blocks.
Executable code can then be automatically generated and
simulated from these function blocks. This bodes well
for designers of CPS, who are more likely to be experts
in specific industrial domains, rather than in computer
science. The intuitive graphical nature and automatic
code synthesis of IEC 61499 programs will alleviate the
programming burden of industrial engineers, while
ensuring more reliable software. While software
synthesis from IEC 61499 programs is not new, the
generation of efficient code from them has been
wanting. This has made it difficult for function blocks
to be used in software development for
resource-constrained embedded controllers commonly
employed in CPS. To address this, we present an
approach that can generate very efficient code from
function block descriptions. Experimental results from
a benchmark suite shows that our approach produces
substantially faster and smaller code compared to
existing techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Subramanian:2012:GOP,
author = "Varun Subramanian and Michael Gilberti and Alex Doboli
and Dan Pescaru",
title = "A goal-oriented programming framework for grid sensor
networks with reconfigurable embedded nodes",
journal = j-TECS,
volume = "11",
number = "4",
pages = "79:1--79:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362346",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-physical systems (CPS) are large, distributed
embedded systems integrated with various sensors and
actuators. CPS are rapidly emerging as an important
computing paradigm in many modern applications.
Developing CPS applications is currently challenging
due to the sheer complexity of the related
functionality as well as the broad set of constraints
and unknowns that must be tackled during operation.
This article presents a novel high-level programming
model and the supporting optimization and middleware
routines for executing applications on
physically-distributed networks of reconfigurable
embedded systems. The proposed model describes the
optimization goals, sensing inputs, actuation outputs,
events, and constraints of an application, while
leaving to the compiler and execution environment the
task of optimally implementing the derived
functionality. Experimental results discuss the
additional performance optimizations enabled by the
proposed model, and the timing and power consumption of
the middleware routines, and present a temperature
monitoring application implemented on a network of
reconfigurable, embedded processors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tan:2012:ACF,
author = "Rui Tan and Guoliang Xing and Xue Liu and Jianguo Yao
and Zhaohui Yuan",
title = "Adaptive calibration for fusion-based cyber-physical
systems",
journal = j-TECS,
volume = "11",
number = "4",
pages = "80:1--80:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362347",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many Cyber-Physical Systems (CPS) are composed of
low-cost devices that are deeply integrated with
physical environments. As a result, the performance of
a CPS system is inevitably undermined by various
physical uncertainties, which include stochastic
noises, hardware biases, unpredictable environment
changes, and dynamics of the physical process of
interest. Traditional solutions to these issues (e.g.,
device calibration and collaborative signal processing)
work in an open-loop fashion and hence often fail to
adapt to the uncertainties after system deployment. In
this article, we propose an adaptive system-level
calibration approach for a class of CPS systems whose
primary objective is to detect events or targets of
interest. Through collaborative data fusion, our
calibration approach features a feedback control loop
that exploits system heterogeneity to mitigate the
impact of aforementioned uncertainties on the system
performance. In contrast to existing heuristic-based
solutions, our control-theoretical calibration
algorithm can ensure provable system stability and
convergence. We also develop a routing algorithm for
fusion-based multihop CPS systems that is robust to
communication unreliability and delay. Our approach is
evaluated by both experiments on a testbed of Tmotes as
well as extensive simulations based on data traces
gathered from a real vehicle detection experiment. The
results demonstrate that our calibration algorithm
enables a CPS system to maintain the optimal sensing
performance in the presence of various system and
environmental dynamics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nam:2012:MTI,
author = "Min-Young Nam and Kyungtae Kang and Rodolfo Pellizzoni
and Kyung-Joon Park and Jung-Eun Kim and Lui Sha",
title = "Modeling towards incremental early analyzability of
networked avionics systems using virtual integration",
journal = j-TECS,
volume = "11",
number = "4",
pages = "81:1--81:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362348",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the advance of hardware technology, more features
are incrementally added to already existing networked
systems. Avionics has a stronger tendency to use
preexisting applications due to its complexity and
scale. As resource sharing becomes intense among the
network and the computing modules, it has become a
difficult task for the system designer to make
confident architectural decisions even for incremental
changes. Providing a tailored environment to model and
analyze incremental changes requires a combination of
software tools and hardware support. We have built a
virtual integration tool called ASIIST which can
provide a worst-case end-to-end latency of data that is
sent through a network and the internal bus
architecture of the end-systems. Also, we have devised
a new real-time switching algorithm which guarantees
the worst-case network delay of preexisting network
traffic under feasible conditions. With the real-time
switch support, ASIIST can provide an early modularized
analysis of the end-to-end latency to make
architectural design choices and incremental changes
easier for the user.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pajic:2012:RAE,
author = "Miroslav Pajic and Alexander Chernoguzov and Rahul
Mangharam",
title = "Robust architectures for embedded wireless network
control and actuation",
journal = j-TECS,
volume = "11",
number = "4",
pages = "82:1--82:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362349",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Networked cyber-physical systems are fundamentally
constrained by the tight coupling and closed-loop
control of physical processes. To address actuation in
such closed-loop wireless control systems there is a
strong need to rethink the communication architectures
and protocols for reliability, coordination, and
control. We introduce the Embedded Virtual Machine
(EVM), a programming abstraction where controller tasks
with their control and timing properties are maintained
across physical node boundaries and functionality is
capable of migrating to the most competent set of
physical controllers. In the context of process and
discrete control, an EVM is the distributed runtime
system that dynamically selects primary-backup sets of
controllers given spatial and temporal constraints of
the underlying wireless network. EVM-based algorithms
allow network control algorithms to operate seamlessly
over less reliable wireless networks with topological
changes. They introduce new capabilities such as
predictable outcomes during sensor/actuator failure,
adaptation to mode changes, and runtime optimization of
resource consumption. An automated design flow from
Simulink to platform-independent domain-specific
languages, and subsequently, to platform-dependent code
generation is presented. Through case studies in
discrete and process control we demonstrate the
capabilities of EVM-based wireless network control
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lakshmanan:2012:OPM,
author = "Karthik Lakshmanan and Dionisio {De Niz} and
Ragunathan (Raj) Rajkumar and Gabriel Moreno",
title = "Overload provisioning in mixed-criticality
cyber-physical systems",
journal = j-TECS,
volume = "11",
number = "4",
pages = "83:1--83:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362350",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-physical systems are an emerging class of
applications that require tightly coupled interaction
between the computational and physical worlds. These
systems are typically realized using sensor/actuator
interfaces connected with processing backbones. Safety
is a primary concern in cyber-physical systems since
the actuators directly influence the physical world.
However, unexpected or unusual conditions in the
physical world can manifest themselves as increased
workload demands being offered to the computational
infrastructure of a cyber-physical system. Guaranteeing
system safety under overload conditions is therefore a
prime concern in developing and deploying
cyber-physical systems. In this work, we study this
problem in the context of a radar surveillance system,
where tasks have different levels of criticality or
influence on system safety. In the face of overloads,
we observe that the desirable property in such systems
is that the more critical tasks continue to meet their
timing requirements. We capture this mixed-criticality
overload requirement using a formal overload-tolerance
metric called ductility. Using this overload-tolerance
metric, we first develop our solution in the context of
uniprocessor systems, where we show that Zero-Slack
scheduling (ZS) algorithms can be used to improve the
overload behavior in mixed-criticality cyber-physical
systems compared to existing fixed-priority scheduling
algorithms like Rate-Monotonic Scheduling (RMS) and
Criticality-As-Priority-Assignment (CAPA). Leveraging
these results, we then develop a criticality-aware task
allocation algorithm called Compress-on-Overload
Packing (COP) for dealing with multiprocessor
cyber-physical systems. Evaluation results show that
COP achieves up to five times better ductility than
traditional load balancing bin-packing algorithms like
Worst-Fit Decreasing (WFD). Finally, we apply ZS and
COP to the radar surveillance system to demonstrate the
resulting improvement in system overload behavior. Our
implementation of the Zero-Slack scheduler is available
as a part of the Linux/RK project, which provides
resource kernel extensions for Linux.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Woehrle:2012:CTC,
author = "Matthias Woehrle and Kai Lampka and Lothar Thiele",
title = "Conformance testing for cyber-physical systems",
journal = j-TECS,
volume = "11",
number = "4",
pages = "84:1--84:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362351",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-Physical Systems (CPS) require a high degree of
reliability and robustness. Hence it is important to
assert their correctness with respect to
extra-functional properties, like power consumption,
temperature, etc. In turn the physical quantities may
be exploited for assessing system implementations. This
article develops a methodology for utilizing
measurements of physical quantities for testing the
conformance of a running CPS with respect to a formal
description of its required behavior allowing to
uncover defects. We present foundations and
implementations of this approach and demonstrate its
usefulness by conformance testing power measurements of
a wireless sensor node with a formal model of its power
consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2012:OTA,
author = "Qi Zhu and Haibo Zeng and Wei Zheng and Marco {Di
Natale} and Alberto Sangiovanni-Vincentelli",
title = "Optimization of task allocation and priority
assignment in hard real-time distributed systems",
journal = j-TECS,
volume = "11",
number = "4",
pages = "85:1--85:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362352",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The complexity and physical distribution of modern
active safety, chassis, and powertrain automotive
applications requires the use of distributed
architectures. Complex functions designed as networks
of function blocks exchanging signal information are
deployed onto the physical HW and implemented in a SW
architecture consisting of a set of tasks and messages.
The typical configuration features priority-based
scheduling of tasks and messages and imposes end-to-end
deadlines. In this work, we present and compare
formulations and procedures for the optimization of the
task allocation, the signal to message mapping, and the
assignment of priorities to tasks and messages in order
to meet end-to-end deadline constraints and minimize
latencies. Our formulations leverage worst-case
response time analysis within a mixed integer linear
optimization framework and are compared for performance
against a simulated annealing implementation. The
methods are applied for evaluation to an automotive
case study of complexity comparable to industrial
design problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cucinotta:2012:ART,
author = "Tommaso Cucinotta and Fabio Checconi and Luca Abeni
and Luigi Palopoli",
title = "Adaptive real-time scheduling for legacy multimedia
applications",
journal = j-TECS,
volume = "11",
number = "4",
pages = "86:1--86:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362353",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multimedia applications are often executed on standard
personal computers. The absence of established
standards has hindered the adoption of real-time
scheduling solutions in this class of applications.
Developers have adopted a wide range of heuristic
approaches to achieve an acceptable timing behavior but
the result is often unreliable. We propose a mechanism
to extend the benefits of real-time scheduling to
legacy applications based on the combination of two
techniques: (1) a real-time monitor that observes and
infers the activation period of the application, and
(2) a feedback mechanism that adapts the scheduling
parameters to improve its real-time performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Scharfenberger:2012:RIP,
author = "Christian Scharfenberger and Samarajiit Chakraborty
and Georg F{\"a}rber",
title = "Robust image processing for an omnidirectional
camera-based smart car door",
journal = j-TECS,
volume = "11",
number = "4",
pages = "87:1--87:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2362354",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Over the last decade, there has been an increasing
emphasis on driver-assistance systems for the
automotive domain. In this article, we report our work
on designing a camera-based surveillance system
embedded in a ``smart'' car door. Such a camera is used
to monitor the ambient environment outside the car, for
instance, the presence of obstacles such as approaching
cars or cyclists who might collide with the car door if
opened-and automatically control the car door
operations. This is an enhancement to the currently
available side-view mirrors that the driver/passenger
checks before opening the car door. The focus of this
article is on fast and robust image processing
algorithms specifically targeting such a smart car door
system. The requirement is to quickly detect traffic
objects of interest from grayscale images captured by
omnidirectional cameras. While known algorithms for
object extraction from the image processing literature
rely on color information and are sensitive to shadows
and illumination changes, our proposed algorithms are
highly robust, can operate on grayscale images (color
images are not available in our setup), and output
results in real time. We present a number of
experimental results based on image sequences captured
from real-life traffic scenarios to demonstrate the
applicability of our algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gordon-Ross:2012:CCR,
author = "Ann Gordon-Ross and Frank Vahid and Nikil Dutt",
title = "Combining code reordering and cache configuration",
journal = j-TECS,
volume = "11",
number = "4",
pages = "88:1--88:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2399177",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The instruction cache is a popular optimization target
due to the cache's high impact on system performance
and power and because of the cache's predictable
temporal and spatial locality. This article is an in
depth study on the interaction of code reordering (a
long-known technique) and cache configuration (a
relatively new technique). Experimental results show
that code reordering coupled with cache configuration
reveals additional energy savings as high as 10--15\%
for several benchmarks with reduced cache area as high
as 48\%. To exploit these additional benefits, we
architect and evaluate several design exploration
heuristics for combining these two methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baiocchi:2012:EDB,
author = "Jos{\'e} A. Baiocchi and Bruce R. Childers and Jack W.
Davidson and Jason D. Hiser",
title = "Enabling dynamic binary translation in embedded
systems with scratchpad memory",
journal = j-TECS,
volume = "11",
number = "4",
pages = "89:1--89:??",
month = dec,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2362336.2399178",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 10 17:38:16 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Important challenges for embedded systems can be
addressed by dynamic binary translation. A dynamic
binary translator stores translated instructions in a
software-managed code cache, which is usually large to
minimize overhead. This article shows how to use a
small scratchpad memory for the code cache. A small
code cache may require frequent code evictions and
retranslation, which degrade performance. We propose
techniques to reduce the number of instructions
inserted by the translator and a way to form fragments
that minimizes translated code size. With our
techniques, a much smaller code cache can hold a
program's translated code working set.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khalgui:2013:ISI,
author = "Mohamed Khalgui and Zhiwu Li",
title = "Introduction to the {Special Issue on Modeling and
Verification of Discrete Event Systems}",
journal = j-TECS,
volume = "12",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406337",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2013:DLE,
author = "Shouguang Wang and Chengying Wang and Yanping Yu",
title = "Design of Liveness-Enforcing Supervisors for {S3PR}
Based on Complementary Places",
journal = j-TECS,
volume = "12",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406338",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, an algorithm is proposed to design
liveness-enforcing supervisors for systems of simple
sequential processes with resources (S$^3$PR) based on
complementary places. Firstly, a mixed integer
programming (MIP) based deadlock detection method is
used to find unmarked strict minimal siphons from an
infinite-capacity net. Next, the finite-capacity net,
in which liveness can be enforced, is obtained by
adding capacity function to the infinite-capacity net.
Finally, complementary-place transformation is used to
transform the finite-capacity net into an
infinite-capacity net. This article focuses on adding a
complementary place to each operation place that is
related to unmarked siphons, deals with the deadlock
problem from a new view point, and hence advances the
deadlock control theory. Compared with the existing
methods, the new policy is easier to implement for real
industrial systems. More importantly, design of a
complementary-place supervisor is very easy. Finally,
in some cases, the new policy can obtain a structurally
simpler supervisor with more permissive behavior than
the existing methods do. A flexible manufacturing
systems (FMS) example is used to compare the proposed
policy with some other methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2013:CMS,
author = "Yufeng Chen and Gaiyun Liu",
title = "Computation of Minimal Siphons in {Petri} Nets by
Using Binary Decision Diagrams",
journal = j-TECS,
volume = "12",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Siphons play an important role in the development of
deadlock control methods by using Petri nets. The
number of siphons increases exponentially with respect
to the size of a Petri net. This article presents a
symbolic approach to the computation of minimal siphons
in Petri nets by using binary decision diagrams (BDD).
The siphons of a Petri net can be found via a set of
logic conditions. The logic conditions are symbolically
modeled by using Boolean algebras. The operations of
Boolean algebras are implemented by BDD that are
capable of representing large sets of siphons with
small shared data structures. The proposed method first
uses BDD to compute all siphons of a Petri net and then
a binary relation is designed to extract all minimal
siphons. Finally, by using a number of examples, the
efficiency of the proposed method is verified through
different-sized problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ding:2013:DAV,
author = "Zhijun Ding and Changjun Jiang and Mengchu Zhou",
title = "Design, Analysis and Verification of Real-Time Systems
Based on Time {Petri} Net Refinement",
journal = j-TECS,
volume = "12",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406340",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A type of refinement operations of time Petri nets is
presented for design, analysis and verification of
complex real-time systems. First, the behavior
preservation is studied under time constraints in a
refinement operation, and a sufficient condition for
behavior preservation is obtained. Then, the property
preservation is considered, and the results indicate
that if the refinement operation of time Petri nets
satisfies behavior preservation, it can also preserve
properties such as boundedness and liveness. Finally,
based on the behavior preservation, a reachability
decidability algorithm of a refined time Petri net is
designed using the reachability trees of its original
net and subnet. The research results are illustrated by
an example of designing, analyzing and verifying a
real-time manufacturing system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{McInnes:2013:MAT,
author = "Allan I. McInnes",
title = "Modeling and Analysis of {TinyOS} Sensor Node
Firmware: a {CSP} Approach",
journal = j-TECS,
volume = "12",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406341",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless sensor networks are an increasingly popular
application area for embedded systems. Individual
sensor nodes within a network are typically
resource-constrained, event-driven, and require a high
degree of concurrency. This combination of requirements
motivated the development of the widely used TinyOS
sensor node operating system. The TinyOS concurrency
model is a lightweight nonpreemptive system designed to
suit the needs of typical sensor network applications.
Although the TinyOS concurrency model is easier to
reason about than preemptive threads, it can still give
rise to undesirable behavior due to unexpected
interleavings of related tasks, or unanticipated
preemption by interrupt handlers. To aid TinyOS
developers in understanding the behavior of their
programs we have developed a technique for using the
process algebra Communicating Sequential Processes
(CSP) to model the interactions between TinyOS
components, and between an application and the TinyOS
scheduling and preemption mechanisms. Analysis of the
resulting models can help TinyOS developers to discover
and diagnose concurrency-related errors in their
designs that might otherwise go undetected until after
the application has been widely deployed. Such analysis
is particularly valuable for the TinyOS components that
are used as building blocks for a large number of other
applications, since a subtle or sporadic error in a
widely deployed building block component could be
extremely costly to repair.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Godary-Dejean:2013:FVD,
author = "Karen Godary-Dejean and David Andreu",
title = "Formal Validation of a Deterministic {MAC} Protocol",
journal = j-TECS,
volume = "12",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406342",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article deals with the formal validation of
STIMAP, a medium access protocol that has been designed
to meet the specific requirements of an implantable
network-based neuroprosthesis. This article presents
the modeling and the validation of its medium access,
using model checking on Time Petri Nets. Doing so, we
show that existent formal methods and tools are not
perfectly suitable for the validation of real systems,
especially when some hardware parameters have to be
considered. This article then presents how these
difficulties have been managed during the modeling and
verification phases, and gives the validation results
for STIMAP, providing constraints to respect.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boucheneb:2013:RIS,
author = "Hanifa Boucheneb and Kamel Barkaoui",
title = "Reducing Interleaving Semantics Redundancy in
Reachability Analysis of Time {Petri} Nets",
journal = j-TECS,
volume = "12",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406343",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The main problem of verification techniques based on
exploration of (reachable) state space is the state
explosion problem. In timed models, abstract states
reached by different interleavings of the same set of
transitions are, in general, different and their union
is not necessarily an abstract state. To attenuate this
state explosion, it would be interesting to reduce the
redundancy caused by the interleaving semantics by
agglomerating all these abstract states whenever their
union is an abstract state. This article considers the
time Petri net model and establishes some sufficient
conditions that ensure that this union is an abstract
state. In addition, it proposes a procedure to compute
this union without computing beforehand intermediate
abstract states. Finally, it shows how to use this
result to improve the reachability analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2013:SCE,
author = "Zhiming Zhang and Weimin Wu",
title = "Sequence Control of Essential Siphons for Deadlock
Prevention in {Petri} Nets",
journal = j-TECS,
volume = "12",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406344",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Deadlock prevention is crucial to the modeling of
flexible manufacturing systems. In the Petri net
framework, deadlock prevention is often addressed by
siphon-based control (SC) policies. Recent research
results show that SC methods can avoid full siphon
enumeration by using mixed integer programming (MIP) to
greatly increase the computational efficiency so that
it can be applied in large systems in computable time.
Besides, maximally permissive control solutions can be
obtained by means of iterative siphon control (ISC)
approaches and MIP. Then the remaining problems are
redundancy and MIP iterations. Redundant controllers
make the closed-loop system more complicated and each
MIP iteration increases the total computational time.
This article proposes a revised ISC deadlock prevention
policy which can achieve better results than the other
reported methods in terms of redundancy and MIP
iterations while maintaining the maximal
permissiveness. Several benchmark examples are provided
to illustrate the proposed approach and to be compared
with the other reported methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2013:HGA,
author = "Zakir Hussain Ahmed",
title = "A Hybrid Genetic Algorithm for the {Bottleneck
Traveling Salesman Problem}",
journal = j-TECS,
volume = "12",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406345",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The bottleneck traveling salesman problem is to find a
Hamiltonian circuit that minimizes the largest cost of
any of its arcs in a graph. A simple genetic algorithm
(GA) using sequential constructive crossover has been
developed to obtain heuristic solution to the problem.
The hybrid GA incorporates 2-opt search, another
proposed local search and immigration to the simple GA
for obtaining better solution. The efficiency of our
hybrid GA to the problem against two existing heuristic
algorithms has been examined for some symmetric TSPLIB
instances. The comparative study shows the
effectiveness of our hybrid algorithm. Finally, we
present solutions to the problem for asymmetric TSPLIB
instances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2013:OSL,
author = "Naiqi Wu and Mengchu Zhou and Gang Hu",
title = "One-Step Look-Ahead Maximally Permissive Deadlock
Control of {AMS} by Using {Petri} Nets",
journal = j-TECS,
volume = "12",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406346",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "It is desired that a deadlock control policy for
automated manufacturing systems (AMS) is maximally
permissive. However, its tractability issue remains
open, and this work addresses this important issue. It
models AMS with a resource-oriented Petri net (ROPN)
and presents a necessary and sufficient condition under
which there exists a one-step look-ahead maximally
permissive control policy for deadlock avoidance in
AMS. It further identifies some conditions under which
a one-step look-ahead maximally permissive deadlock
control policy exists for a single-capacity system. The
conditions can be conveniently examined by using the
developed ROPN model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2013:TBD,
author = "Yi-Sheng Huang and Yen-Liang Pan and Pin-June Su",
title = "Transition-Based Deadlock Detection and Recovery
Policy for {FMSs} Using Graph Technique",
journal = j-TECS,
volume = "12",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406347",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A transition-controlled deadlock detection and
recovery prevention policy is presented for a subclass
of Petri nets used to model flexible manufacturing
systems. The subclass is called systems of simple
sequential processes with resources (S$^3$PR). The
proposed policy is different from the standard deadlock
prevention policies. Instead of adding control places,
this policy adds a controlled transition to solve a
group of deadlocked markings that have the same
graph-based property. Finally, the results of our study
indicate that the proposed policy appears to be more
permissive than those existing ones that add control
places.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nazemzadeh:2013:FMD,
author = "Payam Nazemzadeh and Abbas Dideban and Meisam
Zareiee",
title = "Fault Modeling in Discrete Event Systems Using {Petri}
Nets",
journal = j-TECS,
volume = "12",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406348",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article a model-based controller
reconfiguration method for fault-tolerant control of
discrete event systems has been introduced. In this
method, we model the fault conditions for each
specified fault as a new model called fault model. The
system then consists of three different models called
process, specification and fault. The faulty parts of
the system are not permitted to do any job and the
controller tries to enforce the specifications by other
parts of the system. With this method, the controller
reconfiguration problem for fault- tolerant control of
discrete event systems converts to the problem of
synchronizing the process, specification and fault
model. We must synthesize a supervisor that can enforce
both specifications and faults status. If this
supervisor can be determined, we can achieve a
fault-tolerant controller. Implementing both
specification and fault models in the system, may lead
to a large number of forbidden states and constraints
and so on a more complicated forbidden states problem
must be solved. The application of constraints
simplification methods is shown. By the existing
methods for offline simplifying of constraints, we can
arrive at a simplified fault tolerant controller.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mhamdi:2013:FMT,
author = "Tarek Mhamdi and Osman Hasan and Sofi{\`e}ne Tahar",
title = "Formalization of Measure Theory and {Lebesgue}
Integration for Probabilistic Analysis in {HOL}",
journal = j-TECS,
volume = "12",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406349",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dynamic systems that exhibit probabilistic behavior
represent a large class of man-made systems such as
communication networks, air traffic control, and other
mission-critical systems. Evaluation of quantitative
issues like performance and dependability of these
systems is of paramount importance. In this paper, we
propose a generalized methodology to formally reason
about probabilistic systems within a theorem prover. We
present a formalization of measure theory in the HOL
theorem prover and use it to formalize basic concepts
from the theory of probability. We also use the
Lebesgue integration to formalize statistical
properties of random variables. To illustrate the
practical effectiveness of our methodology, we formally
prove classical results from the theories of
probability and information and use them in a data
compression application in HOL.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khalgui:2013:RRE,
author = "Mohamed Khalgui and Olfa Mosbahi and Zhiwu Li",
title = "Runtime Reconfigurations of Embedded Controllers",
journal = j-TECS,
volume = "12",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406350",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The article deals with Reconfigurable Embedded Control
Systems following different Component-based
Technologies and/or Architecture Description Languages
used today in Industry. We define a Control Component
as a software unit to support control tasks of the
system which is assumed to be a network of components
with precedence constraints. We define an agent-based
architecture to handle automatic reconfigurations under
well-defined conditions by creating, deleting or
updating components to bring the whole system into safe
and optimal behaviors. To cover all possible
reconfiguration forms, we model the agent by nested
state machines according to the formalism Net
Condition/Event Systems (abbr. NCES) which is an
extension of Petri nets. We apply in addition a model
checking to verify functional and extra-functional
properties according to the temporal logic
``Computation Tree Logic'' (abbr. CTL). The goal is to
check the agent's reactivity after any evolution of the
environment. Several complex networks can implement the
system such that each one is executed at a given time
when a corresponding reconfiguration scenario is
automatically applied by the agent. To check the
correctness of each one of them, we apply in several
steps a refinement-based approach that automatically
specifies feasible Control Components according to
NCES. The model checker SESA is automatically applied
in each step to verify deadlock properties of new
generated components, and is manually used to verify
CTL-based properties according to user requirements.
Two Industrial Benchmark Production Systems FESTO and
EnAS available in our research laboratory are applied
to explain the article's contributions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mery:2013:FSM,
author = "Dominique M{\'e}ry and Neeraj Kumar Singh",
title = "Formal Specification of Medical Systems by Proof-Based
Refinement",
journal = j-TECS,
volume = "12",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406351",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Formal methods have emerged as an alternative approach
to ensuring quality and correctness of highly critical
systems, overcoming limitations of traditional
validation techniques such as simulation and testing.
We propose a refinement-based methodology for complex
medical systems design, which possesses all the
required key features. A refinement-based combined
approach of formal verification, model validation using
a model-checker and refinement chart is proposed in
this methodology for designing a high-confidence
medical device. Furthermore, we show the effectiveness
of this methodology for the design of a cardiac
pacemaker system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mosbahi:2013:CFM,
author = "Olfa Mosbahi",
title = "Combining Formal Methods for the Development of
Reactive Systems",
journal = j-TECS,
volume = "12",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406352",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article deals with the use of two verification
approaches: theorem proving and model checking. We
focus on the Event-B method by using its associated
theorem proving tool (Click\_n\_Prove), and on the
language TLA$^+$ by using its model checker TLC. By
considering the limitation of the Event-B method to
invariance properties, we propose to apply the language
TLA$^+$ to verify liveness properties on a software
behavior. We extend first the expressivity and the
semantics of a B model (called temporal B model) to
deal with the specification of fairness and eventuality
properties. Second, we give transformation rules from a
temporal B model into a TLA$^+$ module. We present in
particular, our prototype system called B2TLA$^+$, that
we have developed to support this transformation; then
we can verify these properties thanks to the model
checker TLC on finite state systems. For the
verification of infinite-state systems, we propose the
use of the predicate diagrams. We illustrate our
approach on a case study of a parcel sorting system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sunder:2013:FVD,
author = "Christoph S{\"u}nder and Valeriy Vyatkin and Alois
Zoitl",
title = "Formal Verification of Downtimeless System Evolution
in Embedded Automation Controllers",
journal = j-TECS,
volume = "12",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406353",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a new formal approach to
validation of on-the-fly modification of control
software in automation systems. The concept of
downtimeless system evolution (DSE) is introduced. The
DSE is essentially based on the use of IEC 61499 system
architecture and formal modeling and verification of
the hardware and software of an automation device. The
validation is performed by means of two complimentary
techniques: analytic calculations and formal
verification by model-checking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khalgui:2013:DRA,
author = "Mohamed Khalgui",
title = "Distributed Reconfigurations of Autonomous {IEC61499}
Systems",
journal = j-TECS,
volume = "12",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2406336.2406354",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Jan 25 17:38:43 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The article deals with Distributed Multiagent
Reconfigurable Embedded Control Systems following the
International Industrial Standard IEC61499 in which a
Function Block (Abbreviated by FB) is an
event-triggered software component owning data and a
control system is a network of distributed blocks. We
define a multiagent embedded architecture in which a
Reconfiguration Agent is affected to each device of the
execution environment to apply local reconfigurations,
and a Coordination Agent is proposed for coordination
between devices in order to guarantee safe and coherent
distributed reconfigurations. A Communication Protocol
is proposed to handle such coordination by using
well-defined Coordination Matrices. A prototype is
developed to simulate the whole architecture when
faults occur or system's optimizations are applied. We
specify Reconfiguration Agents to be modeled by nested
state machines, and the Coordination Agent according to
the formalism Net Condition/Event Systems (Abbreviated
by NCES) which is an extension of Petri nets. To allow
correct and coherent distributed reconfigurations, we
check all possible interactions between controllers by
verifying that whenever a reconfiguration is applied in
a device, the Coordination Agent and other concerned
devices react as described in user requirements. We
propose finally XML-based implementations of both
Coordination and Reconfiguration Agents according to
the technology IEC61499. The article's contributions
are applied to two Benchmark Production Systems
available in our research laboratory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2013:ISS,
author = "Jian-Jia Chen and Maurizio Palesi",
title = "Introduction to the special section on
{ESTIMedia'12}",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "32:1--32:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435228",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nikitakis:2013:NLP,
author = "Antonis Nikitakis and Savvas Papaioannou and Ioannis
Papaefstathiou",
title = "A novel low-power embedded object recognition system
working at multi-frames per second",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "33:1--33:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435229",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "One very important challenge in the field of
multimedia is the implementation of fast and detailed
Object Detection and Recognition systems. In
particular, in the current state-of-the-art mobile
multimedia systems, it is highly desirable to detect
and locate certain objects within a video frame in real
time. Although a significant number of Object Detection
and Recognition schemes have been developed and
implemented, triggering very accurate results, the vast
majority of them cannot be applied in state-of-the-art
mobile multimedia devices; this is mainly due to the
fact that they are highly complex schemes that require
a significant amount of processing power, while they
are also time consuming and very power hungry. In this
article, we present a novel FPGA-based embedded
implementation of a very efficient object recognition
algorithm called Receptive Field Cooccurrence
Histograms Algorithm (RFCH). Our main focus was to
increase its performance so as to be able to handle the
object recognition task of today's highly sophisticated
embedded multimedia systems while keeping its energy
consumption at very low levels. Our low-power embedded
reconfigurable system is at least 15 times faster than
the software implementation on a low-voltage high-end
CPU, while consuming at least 60 times less energy. Our
novel system is also 88 times more energy efficient
than the recently introduced low-power multi-core Intel
devices which are optimized for embedded systems. This
is, to the best of our knowledge, the first system
presented that can execute the complete complex object
recognition task at a multi frame per second rate while
consuming minimal amounts of energy, making it an ideal
candidate for future embedded multimedia systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhai:2013:MSA,
author = "Jiali Teddy Zhai and Hristo Nikolov and Todor
Stefanov",
title = "Mapping of streaming applications considering
alternative application specifications",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "34:1--34:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435230",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Streaming applications often require a parallel Model
of Computation (MoC) to specify their application
behavior and to facilitate mapping onto Multi-Processor
System-on-Chip (MPSoC) platforms. Various performance
requirements and resource budgets of embedded systems
ask for an efficient design space exploration (DSE)
approach to select the best design from a design space
consisting of a large number of design choices.
However, existing DSE approaches explore the design
space that includes only architecture and mapping
alternatives for an initial application specification
given by the application designer. In this article, we
first show that a design often might not be optimal if
alternative specifications of a given application are
not taken into account. We further argue that the best
alternative specification consists of only independent
and load-balanced application tasks. Based on the
Polyhedral Process Network (PPN) MoC, we present an
approach to analyze and transform an initial PPN to an
alternative one that contains only independent
processes if possible. Finally, by prototyping
real-life applications on both FPGA-based MPSoCs and
desktop multi-core platforms, we demonstrate that
mapping the alternative application specification
results in a large performance gain compared to those
approaches, in which alternative application
specifications are not taken into account.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Geuns:2013:SST,
author = "Stefan J. Geuns and Joost P. H. M. Hausmans and Marco
J. G. Bekooij",
title = "Sequential specification of time-aware stream
processing applications",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "35:1--35:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435231",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Automatic parallelization of Nested Loop Programs
(NLPs) is an attractive method to create embedded
real-time stream processing applications for multi-core
systems. However, the description and parallelization
of applications with a time dependent functional
behavior has not been considered in NLPs. In such a
description, semantic information about time dependent
behavior must be made available for the compiler, such
that an optimized time independent implementation can
be generated automatically. This article introduces
language constructs with temporal semantics to NLPs.
Using these language constructs, time dependent
applications can be specified and a corresponding
data-driven implementation can be generated for use on
a multi-core system. Despite that these time-aware
language constructs can be data-dependent, the
application remains functionally deterministic.
Pipelining is exploited to increase the throughput of
an application. The media access control (MAC) protocol
of an IEEE 802.11p WLAN transceiver is used to
illustrate the relevance and applicability of the
introduced concepts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2013:LAB,
author = "Daeyoung Lee and Hyunok Oh",
title = "A lifetime aware buffer assignment method for
streaming applications on {DRAM\slash PRAM} hybrid
memory",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "36:1--36:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435232",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a lifetime aware buffer
assignment method for streaming applications like
multimedia specified in a synchronous dataflow (SDF)
graph on a DRAM/PRAM hybrid memory in which the
endurance of PRAM is limited. We determine whether
buffers are assigned to DRAM or PRAM to minimize the
writing frequency of PRAM. To solve the problems, we
formulate them using Answer Set Programming.
Experimental results show that the proposed approach
increases the PRAM lifetime by 63\% compared with no
optimization, and shows the tradeoff between PRAM and
DRAM size to guarantee a lifetime constraint.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chung:2013:EUE,
author = "Yi-Fan Chung and Yin-Tsung Lo and Chung-Ta King",
title = "Enhancing user experiences by exploiting energy and
launch delay trade-off of mobile multimedia
applications",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "37:1--37:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435233",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Launch delay has been an important factor affecting
users' experiences in mobile multimedia applications.
To launch applications quickly, modern mobile systems
such as Android usually keep inactive applications in
the background and manage them through an LRU-based
activity stack. Whenever the user wants to run and
interact with a background application again, that
application can be switched back into the foreground
quickly from the activity stack without delay in
initializing the applications anew. Since background
multimedia applications often continuously consume the
battery power of the smart phone, the challenge is to
effect a balance between application launch delay and
battery lifetime. In this article, we propose
innovative application management strategies that
terminate ``unbeneficial'' background applications to
save energy and pre-launch ``beneficial'' applications
to improve the application launch delay. The proposed
strategies are evaluated through a trace-driven
simulation and a real experiment. The results show that
the average application launch delay can be reduced by
15\% while the average battery lifetime is increased by
18\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DeSutter:2013:ISS,
author = "Bjorn {De Sutter} and Jan Vitek",
title = "Introduction to the special section on {LCTES'11}",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "38:1--38:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435234",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Berthier:2013:SPD,
author = "Nicolas Berthier and Florence Maraninchi and Laurent
Mounier",
title = "Synchronous programming of device drivers for global
resource control in embedded operating systems",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "39:1--39:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435235",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In embedded systems, controlling a shared resource
like a bus, or improving a property like power
consumption, may be hard to achieve when programming
device drivers individually. In this article, we
propose a global resource control approach, based on a
centralized view of the devices' states. The solution
we propose operates on the hardware/software interface.
It involves a simple adaptation of the application
level, to communicate with the hardware via a control
layer. The control layer itself is built from a set of
simple automata: the device drivers, whose states
correspond to functional or power consumption modes,
and a controller to enforce global properties. All
these automata are programmed using a synchronous
language, and compiled into a single piece of C code.
We take as example the node of a sensor network. We
explain the approach in details, demonstrate its use
and benefits with an event-driven or multithreading
operating system, and draw guidelines for its use in
other contexts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cullmann:2013:CPA,
author = "Christoph Cullmann",
title = "Cache persistence analysis: Theory and practice",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "40:1--40:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435236",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "To compute a worst-case execution time (WCET) estimate
for a program, the architectural effects of the
underlying hardware must be modeled. For modern
processors this results in the need for a cache and
pipeline analysis. The timing-relevant result of the
cache analysis is the categorization of the accesses to
cached memory. Categorizations that are obtainable by
the well-known must and may cache analysis [Ferdinand
1997] are always-hit, always-miss and not-classified.
The cache persistence analysis tries to provide
additional information for the not-classified case to
limit the number of misses. There exists a cache
persistence analysis by Ferdinand and Wilhelm based on
abstract interpretation computing these
classifications. In this article, we present a
correctness issue with this analysis. To fix this
issue, we propose two new abstract interpretation based
persistence analyses and show their safety. One is
based on the known may analysis and a second one on the
concept of conflict counting. For fully timing
compositional architectures [Wilhelm et al. 2009] the
persistence information is straightforward to use. We
will apply the concepts of persistence analysis for the
first time to state-of-the-art architectures that
exhibit both timing anomalies and domino effects. Such
architectures do not allow the analyzer to quantify the
costs of a single cache hit or miss in isolation. To
make the usage of the persistence information feasible,
we integrate the presented novel persistence analyses
together with a novel path analysis approach into the
industrially used WCET analyzer aiT.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sifakis:2013:ISS,
author = "Joseph Sifakis and Lothar Thiele and Reinhard
Wilhelm",
title = "Introduction to the special section on rigorous
embedded systems design",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "41:1--41:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435237",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reineke:2013:SCR,
author = "Jan Reineke and Daniel Grund",
title = "Sensitivity of cache replacement policies",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "42:1--42:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435238",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The sensitivity of a cache replacement policy
expresses to what extent the execution history may
influence the number of cache hits and misses during
program execution. We present an algorithm to compute
the sensitivity of a replacement policy. We have
implemented this algorithm in a tool called R elacs
that can handle a large class of replacement policies
including LRU, FIFO, PLRU, and MRU. Sensitivity
properties obtained with Relacs demonstrate that the
execution history can have a strong impact on the
number of cache hits and misses if FIFO, PLRU, or MRU
is used. A simple model of execution time is used to
evaluate the impact of cache sensitivity on measured
execution times. The model shows that measured
execution times may strongly underestimate the
worst-case execution time for FIFO, PLRU, and MRU.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jeong:2013:RRM,
author = "Jinkyu Jeong and Hwanju Kim and Jeaho Hwang and
Joonwon Lee and Seungryoul Maeng",
title = "Rigorous rental memory management for embedded
systems",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "43:1--43:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435239",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Memory reservation in embedded systems is a prevalent
approach to provide a physically contiguous memory
region to its integrated devices, such as a camera
device and a video decoder. Inefficiency of the memory
reservation becomes a more significant problem in
emerging embedded systems, such as smartphones and
smart TVs. Many ways of using these systems increase
the idle time of their integrated devices, and
eventually decrease the utilization of their reserved
memory. In this article, we propose a scheme to
minimize the memory inefficiency caused by the memory
reservation. The memory space reserved for a device can
be rented for other purposes when the device is not
active. For this scheme to be viable, latencies
associated with reallocating the memory space should be
minimal. Volatile pages are good candidates for such
page reallocation since they can be reclaimed
immediately as they are needed by the original device.
We also provide two optimization techniques,
lazy-migration and adaptive-activation. The former
increases the lowered utilization of the rental memory
by our volatile page allocations, and the latter saves
active pages in the rental memory during the
reallocation. We implemented our scheme on a smartphone
development board with the Android Linux kernel. Our
prototype has shown that the time for the return
operation is less than 0.77 seconds in the tested
cases. We believe that this time is acceptable to
end-users in terms of transparency since the time can
be hidden in application initialization time. The
rental memory also brings throughput increases ranging
from 2\% to 200\% based on the available memory and the
applications' memory intensiveness.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vasilikos:2013:HSA,
author = "Vasileios Vasilikos and Georgios Smaragdos and
Christos Strydis and Ioannis Sourdis",
title = "Heuristic search for adaptive, defect-tolerant
multiprocessor arrays",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "44:1--44:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435240",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, new heuristic-search methods and
algorithms are presented for enabling highly efficient
and adaptive, defect-tolerant multiprocessor arrays. We
consider systems where a homogeneous multiprocessor
array lies on top of reconfigurable interconnects which
allow the pipeline stages of the processors to be
connected in all possible configurations. Considering
the multiprocessor array partitioned in substitutable
units at the granularity of pipeline stages, we employ
a variety of heuristic-search methods and algorithms to
isolate and replace defective units. The proposed
heuristics are designed for off-line execution and aim
at minimizing the performance overhead necessarily
introduced to the array by the interconnects' latency.
An empirical evaluation of the designed algorithms is
then carried out, in order to assess the targeted
problem and the efficacy of our approach. Our findings
indicate this to be a NP-complete computational
problem, however, our heuristic-search methods can
achieve, for the problem sizes we exhaustively
searched, 100\% accuracy in finding the optimal
solution among 10$^{19}$ possible candidates within 2.5
seconds. Alternatively, they can provide near-optimal
solutions at an accuracy which consistently exceeds
70\% (compared to the optimal solution) in only
10$^{-4}$ seconds.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Marinescu:2013:FSJ,
author = "Maria-Cristina Marinescu and C{\'e}sar S{\'a}nchez",
title = "Fusing statecharts and {Java}",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "45:1--45:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435241",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents FUSE, an approach for modeling
and implementing embedded software components which
starts from a main-stream programming language and
brings some of the key concepts of Statecharts as
first-class elements within this language. Our approach
provides a unified programming environment which not
only preserves some of the advantages of Statecharts'
formal foundation but also directly supports features
of object-orientation and strong typing. By specifying
Statecharts directly in FUSE we eliminate the
out-of-synch between the model and the generated code
and we allow the tuning and debugging to be done within
the same programming model. This article describes the
main language constructs of FUSE and presents its
semantics by translation into the Java programming
language. We conclude by discussing extensions to the
base language which enable the efficient static
checking of program properties.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hubner:2013:ISS,
author = "Michael H{\"u}bner",
title = "Introduction to the special section on multiprocessor
system-on-chip for cyber-physical systems",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "46:1--46:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435242",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paulin:2013:PPP,
author = "Pierre G. Paulin and Ali Erdem {\"O}zcan and Vincent
Gagn{\'e} and Bruno Lavigueur and Olivier Benny",
title = "Parallel programming patterns for multi-processor
{SoC}: Application to video processing",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "47:1--47:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435243",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Efficient, scalable and productive parallel
programming is a major challenge for exploiting the
future multi-processor SoC platforms. This article
presents the MultiFlex programming environment which
has been developed to address this challenge. It is
targeted for use on Platform 2012, a scalable
multi-processor fabric. The MultiFlex environment
supports high-level simulation, iterative platform
mapping, and includes tools for programming model aware
debug, trace, visualization and analysis. This article
focuses on the two classes of programming abstractions
supported in MultiFlex. The first is a set of Parallel
Programming Patterns (PPP) which offer a rich set of
programming abstractions for implementing efficient
data- and task-level parallel applications. The second
is a Reactive Task Management (RTM) abstraction, which
offers a lightweight C-based API to support dynamic
dispatching of small grain tasks on tightly coupled
parallel processing resources. The use of the MultiFlex
native programming model is illustrated through the
capture and mapping of two representative video
applications. The first is a high-quality rescaling
(HQR) application on a multi-processor platform. We
present the details of the optimization process which
was required for mapping the HQR application, for which
the reference code requires 350 GIPS (giga instructions
per second), onto a 16 processor cluster. Our results
show that the parallel implementation using the PPP
model offers almost linear acceleration with respect to
the number of processing elements. The second
application is a high-definition VC-1 decoder. For this
application, we illustrate two different parallel
programming model variants, one using PPPs, the other
based on RTM. These two versions are mapped onto two
variants of a homogeneous version of the Platform 2012
multi-core fabric.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Thiele:2013:PTT,
author = "Lothar Thiele and Lars Schor and Iuliana Bacivarov and
Hoeseok Yang",
title = "Predictability for timing and temperature in
multiprocessor system-on-chip platforms",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "48:1--48:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435244",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "High computational performance in multiprocessor
system-on-chips (MPSoCs) is constrained by the
ever-increasing power densities in integrated circuits,
so that nowadays MPSoCs face various thermal issues.
For instance, high chip temperatures may lead to
long-term reliability concerns and short-term
functional errors. Therefore, the new challenge in
designing embedded real-time MPSoCs is to guarantee the
final performance and correct function of the system,
considering both functional and non-functional
properties. One way to achieve this is by ruling out
mapping alternatives that do not fulfill requirements
on performance or peak temperature already in early
design stages. In this article, we propose a
thermal-aware optimization framework for mapping
real-time applications onto MPSoC platforms. The
performance and temperature of mapping candidates are
evaluated by formal temporal and thermal analysis
models. To this end, analysis models are automatically
generated during design space exploration, based on the
same specifications as used for software synthesis. The
analysis models are automatically calibrated with
performance data reflecting the execution of the system
on the target platform. The data is automatically
obtained prior to design space exploration based on a
set of benchmark mappings. Case studies show that the
performance and temperature requirements are often
conflicting goals and optimizing them together leads to
major benefits in terms of a guaranteed and predictable
high performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Davare:2013:MDE,
author = "Abhijit Davare and Douglas Densmore and Liangpeng Guo
and Roberto Passerone and Alberto L.
Sangiovanni-Vincentelli and Alena Simalatsar and Qi
Zhu",
title = "{metroII}: a design environment for cyber-physical
systems",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "49:1--49:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435245",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-Physical Systems are integrations of computation
and physical processes and as such, will be
increasingly relevant to industry and people. The
complexity of designing CPS resides in their
heterogeneity. Heterogeneity manifest itself in
modeling their functionality as well as in the
implementation platforms that include a multiplicity of
components such as microprocessors, signal processors,
peripherals, memories, sensors and actuators often
integrated on a single chip or on a small package such
as a multi-chip module. We need a methodology, tools
and environments where heterogeneity can be dealt with
at all levels of abstraction and where different tools
can be integrated. We present here Platform-Based
Design as the CPS methodology of choice and metroII, a
design environment that supports it. We present the
metamodeling approach followed in metroII, how to
couple the functionality and implementation platforms
of CPS, and the simulation technology that supports the
analysis of CPS and of their implementation. We also
present examples of use and the integration of metroII
with another popular design environment developed at
Verimag, BIP.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bogdan:2013:PCH,
author = "Paul Bogdan and Siddharth Jain and Radu Marculescu",
title = "Pacemaker control of heart rate variability: a cyber
physical system perspective",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "50:1--50:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435246",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cardiac diseases, like those related to abnormal heart
rate activity, have an enormous economic and
psychological impact worldwide. The approaches used to
control the behavior of modern pacemakers ignore the
fractal nature of heart rate activity. The purpose of
this article is to present a Cyber Physical System
approach to pacemaker design that exploits precisely
the fractal properties of heart rate activity in order
to design the pacemaker controller. Towards this end,
we solve a finite horizon optimal control problem based
on the heartbeat time series and show that this control
problem can be converted into a system of linear
equations. We also compare and contrast the performance
of the fractal optimal control problem under six
different cost functions. Finally, to get an idea of
hardware complexity, we implement the fractal optimal
controller on a Virtex4 FPGA and report some
preliminary results in terms of area overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gohringer:2013:RAN,
author = "Diana G{\"o}hringer and Lukas Meder and Oliver Oey and
J{\"u}rgen Becker",
title = "Reliable and adaptive network-on-chip architectures
for cyber physical systems",
journal = j-TECS,
volume = "12",
number = "1s",
pages = "51:1--51:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435227.2435247",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 19 07:54:21 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Reliability in embedded systems is crucial for many
application domains. Especially, for safety critical
application, as they can be found in the automotive and
avionic domain, a high reliability has to be ensured.
The technology in chip production undergoes a steady
shrinking process from nowadays 25 nanometers. It is
proven that coming technologies, which are much
smaller, can have a higher defect rate after
production, but also at runtime. The physical effects
at runtime come from a higher susceptibility for
radiation. Since the silicon die of a field
programmable gate array (FPGA) includes a high amount
of physical wiring, the radiation effect plays here a
major role. Therefore, this article describes an
approach of a reliable Network-on-Chip (NoC) which can
be used for an FPGA-based system. The article describes
the concept and the physical realization of this NoC
and evaluates its reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2013:SIE,
author = "Jongsung Kim and Javier A. Barria and Morris Chang and
Victor C. M. Leung",
title = "Special issue on embedded systems for interactive
multimedia services {(ES-IMS)}",
journal = j-TECS,
volume = "12",
number = "2",
pages = "19:1--19:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423637",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2013:ELS,
author = "Yeong-Sheng Chen and Yun-Ju Ting and Chih-Heng Ke and
Naveen Chilamkruti and Jong Hyuk Park",
title = "Efficient localization scheme with ring overlapping by
utilizing mobile anchors in wireless sensor networks",
journal = j-TECS,
volume = "12",
number = "2",
pages = "20:1--20:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423638",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This study proposes an efficient localization scheme
in wireless sensor networks. The proposed scheme
utilizes mobile anchors and is based on ring
overlapping. In a wireless sensor network, the nodes
that know their locations are called reference nodes,
and the other nodes that are without the knowledge of
their locations are called blind nodes. To localize a
certain blind node, by comparing the relative RSSI
(Received Signal Strength Indicator) values among
nodes, mobile beacons are utilized to find out the
rings that are centered at a reference node and contain
the blind node. These rings are called B-Rings. Since
the mobile anchors and the reference nodes know their
own locations, the B-Rings can be precisely derived.
Moreover, by using multiple mobile beacons, the widths
of the B-Rings can be further minimized; and then by
overlapping them, the location of the blind nodes can
be efficiently estimated. Most existing localization
schemes that utilize mobile anchors let the mobile
anchors move randomly. In contrast, the proposed scheme
provides regular and simple movement mechanisms for the
mobile anchors. Thus, the mobile anchors consume less
energy than the other schemes, in which the mobile
anchors move randomly. Analytical analysis and
simulation results show that the proposed localization
mechanism can achieve better location accuracy as well
as less movement length of the mobile anchor than the
other existing related approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2013:DEI,
author = "Hung-Min Sun and Chi-Yao Weng and Shiuh-Jeng Wang and
Cheng-Hsing Yang",
title = "Data embedding in image-media using weight-function on
modulo operations",
journal = j-TECS,
volume = "12",
number = "2",
pages = "21:1--21:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423639",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multimedia hiding system is to embed message behind
the specified media, but it is still kept normal in
media representations via human sensitive organizations
without causing imperceptibility. In this article, we
propose a data hiding system by means of flexible
exploiting modification directions to achieve safer
message concealments in image-media. In our scheme, $n$
cover-pixels are flexibly chosen on modulo operations
to embed a secret $s$, where $ n = \lceil \log_3 (s)
\rceil $. The varied pixel values associated with the
chosen $n$ pixels are only changed among $ [ - 1, 1] $.
Because the numbers of adjustable pixels are much
greater than the pixels in the past scheme, our scheme
is able to obtain a higher embedded ratio in response
to the capacity requirements of information hiding
systems. In addition, we also applied the
statistics-steganalyzers to demonstrate that our scheme
has accomplishment not only higher capacity but also
kept the robustness against the blind steganalyzers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2013:AIG,
author = "Sanghyun Seo and Seungtaek Ryoo and Kyunghyun Yoon",
title = "Artistic image generation for emerging multimedia
services by impressionist manner",
journal = j-TECS,
volume = "12",
number = "2",
pages = "22:1--22:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423640",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose the rendering framework
for painting-like image generation and general system
architecture for mobile device. Especially, we focused
on a color division method for generating
neo-impressionist images. The French painter, George
Seurat, introduced pointillism under the theory that
the individual pigments of colors on the canvas are
reconstructed on the human retina. Pointillism is a
painting technique in which many small brush strokes
are combined to form a picture and determines the color
of brush strokes based on the optical mixing of
juxtaposed colors. In order to express countless
separate dots, we form hierarchical points using Wang
Tiles contained points. Also palette will be
constructed using neo-impressionist colors. Based on
this palette, we propose color division algorithm that
distributes hierarchical point's color to pointillist
colors using probability function. Finally,
hierarchical points set that applied proposed color
division rule is converted into brush strokes that
possesses properties such as shape and direction. This
rendering algorithm is performed in our proposed
system. Our scheme is able to produce a painting with
artistic style and be applied to the various platform
having the different computing performance and display
resolution. This system also can be extended to various
imaging devices (IPTV, camera, smart phone, digital
photo frame and so on).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2013:EEN,
author = "Sang Oh Park and Sung Jo Kim",
title = "{ENFFiS}: an enhanced {NAND} flash memory file system
for mobile embedded multimedia system",
journal = j-TECS,
volume = "12",
number = "2",
pages = "23:1--23:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423641",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Since the typical erase cycle limit of a NAND flash
memory's block is about 1,000,000, flash memory should
be erased as evenly as possible; otherwise, file system
hot spots will soon be worn out. This forces a NAND
flash memory file system to scan the whole flash memory
during its mount rather than saving frequently updated
file system information in a fixed area. Since the
mount time linearly increases with the size of NAND
flash memory, boot times of embedded systems are also
linearly increased. In addition, since data loss may
occur if a file system terminates abnormally due to
unexpected errors, a stability scheme for NAND flash
memory file system is in great demand. To resolve these
problems, this article suggests an extended logical
block called Exblock (Extended Block) and a table
called SNode (Snapshot Node) to reduce the mount time
and proposes a new journaling scheme to improve
stability for an enhanced file system for NAND flash
memory storage called ENFFiS (Enhanced NAND Flash
memory File System). It also proposes a new cache
policy to improve read/write performances. ENFFiS shows
better performance than existing file systems in terms
of reading, writing, mount time and stability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2013:TAT,
author = "Jiayin Li and Meikang Qiu and Jian-Wei Niu and
Laurence T. Yang and Yongxin Zhu and Zhong Ming",
title = "Thermal-aware task scheduling in {$3$D} chip
multiprocessor with real-time constrained workloads",
journal = j-TECS,
volume = "12",
number = "2",
pages = "24:1--24:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423642",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Chip multiprocessor (CMP) techniques have been
implemented in embedded systems due to tremendous
computation requirements. Three-dimension (3D) CMP
architecture has been studied recently for integrating
more functionalities and providing higher performance.
The high temperature on chip is a critical issue for
the 3D architecture. In this article, we propose an
online thermal prediction model for 3D chips. Using
this model, we propose novel task scheduling algorithms
based on rotation scheduling to reduce the peak
temperature on chip. We consider data dependencies,
especially inter-iteration dependencies that are not
well considered in most of the current thermal-aware
task scheduling algorithms. Our simulation results show
that our algorithms can efficiently reduce the peak
temperature up to 8.1$^^$ C.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paul:2013:VSI,
author = "Anand Paul and Bo-Wei Chen and Karunanithi
Bharanitharan and Jhing-Fa Wang",
title = "Video search and indexing with reinforcement agent for
interactive multimedia services",
journal = j-TECS,
volume = "12",
number = "2",
pages = "25:1--25:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423643",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this study, we present a video search and indexing
system based on the state support vector (SVM) network,
video graph, and reinforcement agent for recognizing
and organizing video events. In order to enhance the
recognition performance of the state SVM network, two
innovative techniques are presented: state transition
correction and transition quality estimation. The
classification results are also merged into the video
indexing graph, which facilitates the search speed. A
reinforcement algorithm with an efficient scheduling
scheme significantly reduces both the power consumption
and time. The experimental results show the proposed
state SVM network was able to achieve a precision rate
as high as 83.83\% and the query results of the
indexing graph reached 80\% accuracy. The experiments
also demonstrate the performance and feasibility of our
system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nam:2013:PAR,
author = "Yunyoung Nam and Seungmin Rho and Chulung Lee",
title = "Physical activity recognition using multiple sensors
embedded in a wearable device",
journal = j-TECS,
volume = "12",
number = "2",
pages = "26:1--26:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423644",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present a wearable intelligence
device for activity monitoring applications. We
developed and evaluated algorithms to recognize
physical activities from data acquired using a 3-axis
accelerometer with a single camera worn on a body. The
recognition process is performed in two steps: at first
the features for defining a human activity are measured
by the 3-axis accelerometer sensor and the image sensor
embedded in a wearable device. Then, the physical
activity corresponding to the measured features is
determined by applying the SVM classifier. The 3-axis
accelerometer sensor computes the correlation between
axes and the magnitude of the FFT for other features of
an activity. Acceleration data is classified into nine
activity labels. Through the image sensor, multiple
optical flow vectors computed on each grid image patch
are extracted as features for defining an activity. In
the experiments, we showed that an overall accuracy
rate of activity recognition based our method was
92.78\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lim:2013:DRS,
author = "Seung-Ho Lim and Min Choi and Young Sik Jeong",
title = "Data reorganization for scalable video service with
embedded mobile devices",
journal = j-TECS,
volume = "12",
number = "2",
pages = "27:1--27:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423645",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent development of high-speed wireless networks and
embedded systems has enabled the recording and delivery
of high-performance multimedia to heterogeneous mobile
users. To support heterogeneous mobile users with
high-quality multimedia services, scalable video coding
was introduced. In the scalable video coding (SVC),
through multidimensional scalability, all types of
these scalability can be exploited at the same time.
However, the generated video sequences of scalable
video coding are not adequate for mobile multimedia
service systems since its flexibility makes non
contiguous storing and retrieval of partial stream
data. In this article, we propose efficient scalable
video data reorganization for video servicing systems,
which consist of video server and mobile clients. For
video server, we reorganize scalable video streams
taking into account both of decoding dependency and
location in disk array storage, where disk array is
widely used for storage systems of video server. In the
mobile devices, we place substreams with the
consideration of NAND flash memory page and block
boundaries, which is storage for mobile devices. The
experimental results show that the proposed
reorganization of scalable video can improve the
performance of mobile multimedia service systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kang:2013:AEC,
author = "Hyeong-Ju Kang and Heesuk Seo and Jin Kwak",
title = "Area-efficient convolutional deinterleaver for mobile
{TV} receiver",
journal = j-TECS,
volume = "12",
number = "2",
pages = "28:1--28:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423646",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, a single-pointer structure is
proposed for the convolutional deinterleavers of mobile
TV receivers. To enhance the burst-error correcting
capability, the convolutional interleaving and
deinterleaving scheme is widely used in mobile TV
receivers. However, a convolutional deinterleaver
requires many pointer registers. This article
introduces a single-pointer structure that reduces the
number of pointer registers. Experimental results show
that the single-pointer structure reduces the area of
the convolutional deinterleaver by 70\% in a mobile TV
receiver.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bharanitharan:2013:DMS,
author = "K. Bharanitharan and Jiun-Ren Ding and Anand Paul and
Kuen-Ming Lee and Ting-Wei Hou",
title = "Dependable management system for ubiquitous camera
array service in an elder-care center",
journal = j-TECS,
volume = "12",
number = "2",
pages = "29:1--29:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423647",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The concept of smart homes (SH) has been extensively
popularized, and there are a lot of technologies that
need to be continuously utilized and integrated in such
a concept. In this article, some applied problems of
camera array (CA) in the SH are discussed and solved.
Determining how to build an effective management method
for CA in order to ensure that user privacy is not
encroached upon is an important issue. In SH, the
applications of CA are very diversified. We suggest
that a satisfactory management method of CA should be
based on the open service gateway initiative (OSGi)
that includes resource management and monitoring (RMM)
and UPnP security for the problems of resources and
privacy, respectively. Finally, an applied example of
CA is addressed in an elder-care center (EC).
Simulation results show that the management strategy
and application of CA based on an OSGi is
satisfactory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lai:2013:RBR,
author = "Chin-Feng Lai and Min Chen and Meikang Qiu and
Athanasios V. Vasilakos and Jong Hyuk Park",
title = "A {RF4CE}-based remote controller with interactive
graphical user interface applied to home automation
system",
journal = j-TECS,
volume = "12",
number = "2",
pages = "30:1--30:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423648",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the increase in commercial electronic equipment
and its complicated control interfaces, how to design
an effective and user-friendly control interface has
become a topic for many researchers. This research
introduces two-directional communication of an
interactive graphical user interface on a universal
remote control (URC). It is different from current URCs
where users must often spend huge amounts of time
setting the command codes and encoding each device.
With the increase in the number of appliances that the
controller needs to manage and the complicated and
numerous control buttons, using such controllers often
causes difficulties for users. This research employs a
cross-platform with integration theories, so when a
user wants to connect an appliance, both the appliance
end and the controller end will build a two-directional
connection through pairing over Radio Frequency for
Consumer Electronics (RF4CE). After connection, the
system will automatically set the communication
protocol between the controller and the device. The
appliance will automatically transmit its current state
and service in the form of bundles to the controller,
then the controller will project it onto an LCD screen.
The controller can also show the number of appliances
connected to the current position of the user, allowing
the user to use one controller to control all home
appliances with ease, achieving a simplified and
instinctive control interface to build the integrated
control environment for commercial appliances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Waluyo:2013:MQS,
author = "Agustinus Borgy Waluyo and David Taniar and Bala
Srinivasan and Wenny Rahayu",
title = "Mobile query services in a participatory embedded
sensing environment",
journal = j-TECS,
volume = "12",
number = "2",
pages = "31:1--31:??",
month = feb,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2423636.2423649",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Feb 28 06:57:27 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A participatory mobile sensing system is designed to
enable clients to voluntarily collect environmental
data using embedded sensors and a mobile device while
going about their daily activities. Due to the
spatio-temporal nature of the data, and the significant
benefits of the data to the general public, it is
necessary to employ an efficient and effective query
processing model for the mobile clients to access the
data that can be visualized via an interactive
multimedia interface. This article introduces a unified
on-demand and data broadcast model to serve queries in
the context of a mobile sensing system. The
contributions of this article include the following:
(i) it presents a novel data structure and indexing
method to support the system; (ii) it provides
flexibility for the client to issue query using
on-demand or broadcast channel according to the server
load and broadcast schedule; (iii) it enables new data
access and processing for the mobile client; and (iv)
it is designed for a multiple channels/receivers
environment in a 4G wireless network. The proposed
model uses a holistic query processing approach for the
mobile sensing system that offers substantial
efficiency and autonomy for mobile clients when
retrieving data. The results of the experiments
undertaken affirm the effectiveness of its
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kirsch:2013:ISS,
author = "Christoph Kirsch and Vincent Mooney",
title = "Introduction to Special Section on Probabilistic
Embedded Computing",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "86:1--86:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465788",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Palem:2013:TYB,
author = "Krishna Palem and Avinash Lingamneni",
title = "Ten Years of Building Broken Chips: The Physics and
Engineering of Inexact Computing",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "87:1--87:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465789",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Well over a decade ago, many believed that an engine
of growth driving the semiconductor and computing
industries---captured nicely by Gordon Moore's
remarkable prophecy (Moore's law)---was speeding
towards a dangerous cliff-edge. Ranging from
expressions of concern to doomsday scenarios, the exact
time when serious hurdles would beset us varied quite a
bit---some of the more optimistic warnings giving
Moore's law until. Needless to say, a lot of people
have spent time and effort with great success to find
ways for substantially extending the time when we would
encounter the dreaded cliff-edge, if not avoiding it
altogether. Faced with this issue, we started
approaching this in a decidedly different manner---one
which suggested falling off the metaphorical cliff as a
design choice, but in a controlled way. This resulted
in devices that could switch and produce bits that are
correct, namely of having the intended value, only with
a probabilistic guarantee. As a result, the results
could in fact be incorrect. Such devices and associated
circuits and computing structures are now broadly
referred to as inexact designs, circuits, and
architectures. In this article, we will crystallize the
essence of inexactness dating back to 2002 through two
key principles that we developed: (i) that of admitting
error in a design in return for resource savings, and
subsequently (ii) making resource investments in the
elements of a hardware platform proportional to the
value of information they compute. We will also give a
broad overview of a range of inexact designs and
hardware concepts that our group and other groups
around the world have been developing since, based on
these two principles. Despite not being
deterministically precise, inexact designs can be
significantly more efficient in the energy they
consume, their speed of execution, and their area
needs, which makes them attractive in application
contexts that are resilient to error. Significantly,
our development of inexactness will be contrasted
against the rich backdrop of traditional approaches
aimed at realizing reliable computing from unreliable
elements, starting with von Neumann's influential
lectures and further developed by Shannon--Weaver and
others.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Misailovic:2013:PSP,
author = "Sasa Misailovic and Deokhwan Kim and Martin Rinard",
title = "Parallelizing Sequential Programs with Statistical
Accuracy Tests",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "88:1--88:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465790",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present QuickStep, a novel system for parallelizing
sequential programs. Unlike standard parallelizing
compilers (which are designed to preserve the semantics
of the original sequential computation), QuickStep is
instead designed to generate (potentially
nondeterministic) parallel programs that produce
acceptably accurate results acceptably often. The
freedom to generate parallel programs whose output may
differ (within statistical accuracy bounds) from the
output of the sequential program enables a dramatic
simplification of the compiler, a dramatic increase in
the range of applications that it can parallelize, and
a significant expansion in the range of parallel
programs that it can legally generate. Results from our
benchmark set of applications show that QuickStep can
automatically generate acceptably accurate and
efficient parallel programs---the automatically
generated parallel versions of five of our six
benchmark applications run between 5.0 and 7.8 times
faster on eight cores than the original sequential
versions. These applications and parallelizations
contain features (such as the use of modern
object-oriented programming constructs or desirable
parallelizations with infrequent but acceptable data
races) that place them inherently beyond the reach of
standard approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sartori:2013:ETE,
author = "John Sartori and Rakesh Kumar",
title = "Exploiting Timing Error Resilience in Processor
Architecture",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "89:1--89:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465791",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Escalating variations in modern CMOS designs have
become a threat to Moore's law. In light of the
increasing costs of standard worst-case design
practices, timing speculation has become a popular
approach for dealing with static and dynamic
non-determinism and increasing yield. Timing
speculative architectures allow conservative guardbands
to be relaxed, increasing efficiency at the expense of
occasional errors, which are corrected or tolerated by
an error resilience mechanism. Previous work has
proposed circuit- or design-level optimizations that
manipulate the error rate behavior of a design to
increase the efficiency of timing speculation. In this
article, we investigate whether architectural
optimizations can also manipulate error rate behavior
to significantly increase the effectiveness of timing
speculation. To this end, we demonstrate how error rate
behavior indeed depends on processor architecture and
that architectural optimizations can be used to
manipulate the error rate behavior of a processor.
Using timing speculation-aware architectural
optimizations, we demonstrate enhanced overscaling and
up to 29\% additional energy savings for processors
that employ Razor-based timing speculation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chippa:2013:MQV,
author = "Vinay K. Chippa and Kaushik Roy and Srimat T.
Chakradhar and Anand Raghunathan",
title = "Managing the Quality vs. Efficiency Trade-off Using
Dynamic Effort Scaling",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "90:1--90:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465792",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Several current and emerging applications do not have
a unique result for a given input; rather, functional
correctness is defined in terms of output quality.
Recently proposed design techniques exploit the
inherent resilience of such applications and achieve
improved efficiency (energy or performance) by
foregoing correct execution of all the constituent
computations. Hardware and software systems that are
thus designed may be viewed as scalable effort systems,
since they offer the capability to modulate the effort
that they expend towards computation, thereby allowing
for trade-offs between output quality and efficiency.
We propose the concept of Dynamic Effort Scaling (DES),
which refers to dynamic management of the control knobs
that are exposed by scalable effort systems. We argue
the need for DES by observing that the degree of
resilience often varies significantly across
applications, across datasets, and even within a
dataset. We propose a general conceptual framework for
DES by formulating it as a feedback control problem,
wherein the scaling mechanisms are regulated with the
goal of maintaining output quality at or above a
specified limit. We present an implementation of
Dynamic Effort Scaling for recognition and mining
applications and evaluate it for the support vector
machines and K-means clustering algorithms under
various application scenarios and datasets. Our results
clearly demonstrate the benefits of the proposed
approach---statically setting the scaling mechanisms
leads to either significant error overshoot or
significant opportunities for energy savings left on
the table unexploited. In contrast, DES is able to
effectively regulate the output quality while maximally
exploiting the time-varying resiliency in the
workload.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Salajegheh:2013:HWS,
author = "Mastooreh Salajegheh and Yue Wang and Anxiao (Andrew)
Jiang and Erik Learned-Miller and Kevin Fu",
title = "Half-Wits: Software Techniques for Low-Voltage
Probabilistic Storage on Microcontrollers with {NOR}
Flash Memory",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "91:1--91:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465793",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This work analyzes the stochastic behavior of writing
to embedded flash memory at voltages lower than
recommended by a microcontroller's specifications in
order to reduce energy consumption. Flash memory
integrated within a microcontroller typically requires
the entire chip to operate on a common supply voltage
almost twice as much as what the CPU portion requires.
Our software approach allows the flash memory to
tolerate a lower supply voltage so that the CPU may
operate in a more energy-efficient manner.
Energy-efficient coding algorithms then cope with flash
memory writes that behave unpredictably. Our
software-only coding algorithms (in-place writes,
multiple-place writes, RS-Berger codes, and slow
writes) enable reliable storage at low voltages on
unmodified hardware by exploiting the electrically
cumulative nature of half-written data in write-once
bits. For a sensor monitoring application using the
MSP430, coding with in-place writes reduces the overall
energy consumption by 34\%. In-place writes are
competitive when the time spent on low-voltage
operations such as computation are at least four times
greater than the time spent on writes to flash memory.
Our evaluation shows that tightly maintaining the
digital abstraction for storage in embedded flash
memory comes at a significant cost to energy
consumption with minimal gain in reliability. We find
our techniques most effective for embedded workloads
that have significant duty cycling, rare writes, or
energy harvesting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alaghi:2013:SSC,
author = "Armin Alaghi and John P. Hayes",
title = "Survey of Stochastic Computing",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "92:1--92:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465794",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Stochastic computing (SC) was proposed in the 1960s as
a low-cost alternative to conventional binary
computing. It is unique in that it represents and
processes information in the form of digitized
probabilities. SC employs very low-complexity
arithmetic units which was a primary design concern in
the past. Despite this advantage and also its inherent
error tolerance, SC was seen as impractical because of
very long computation times and relatively low
accuracy. However, current technology trends tend to
increase uncertainty in circuit behavior and imply a
need to better understand, and perhaps exploit,
probability in computation. This article surveys SC
from a modern perspective where the small size, error
resilience, and probabilistic features of SC may
compete successfully with conventional methodologies in
certain applications. First, we survey the literature
and review the key concepts of stochastic number
representation and circuit structure. We then describe
the design of SC-based circuits and evaluate their
advantages and disadvantages. Finally, we give examples
of the potential applications of SC and discuss some
practical problems that are yet to be solved.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lingamneni:2013:SPI,
author = "Avinash Lingamneni and Christian Enz and Krishna Palem
and Christian Piguet",
title = "Synthesizing Parsimonious Inexact Circuits through
Probabilistic Design Techniques",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "93:1--93:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465795",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The domain of inexact circuit design, in which
accuracy of the circuit can be exchanged for
substantial cost (energy, delay, and/or area) savings,
has been gathering increasing prominence of late owing
to a growing desire for reducing energy consumption of
the systems, particularly in the domain of embedded and
(portable) multimedia applications. Most of the
previous approaches to realizing inexact circuits
relied on scaling of circuit parameters (such as supply
voltage) taking advantage of an application's error
tolerance to achieve the cost and accuracy trade-offs,
thus suffering from acute drawbacks of considerable
implementation overheads that significantly reduced the
gains. In this article, two novel design approaches
called Probabilistic Pruning and Probabilistic Logic
Minimization are proposed to realize inexact circuits
with zero hardware overhead.Extensive simulations on
various architectures of critical datapath elements
demonstrate that each of the techniques can
independently achieve normalized gains as large as $ 2
\times $--$ 9.5 \times $ in energy-delay-area product
for relative error magnitude as low as $ 10^{-4} $--$
{10 - 8} $ \% compared to corresponding conventional
correct circuits.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cazorla:2013:PPA,
author = "Francisco J. Cazorla and Eduardo Qui{\~n}ones and
Tullio Vardanega and Liliana Cucu and Benoit Triquet
and Guillem Bernat and Emery Berger and Jaume Abella
and Franck Wartel and Michael Houston and Luca
Santinelli and Leonidas Kosmidis and Code Lo and Dorin
Maxim",
title = "{PROARTIS}: Probabilistically Analyzable Real-Time
Systems",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "94:1--94:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465796",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Static timing analysis is the state-of-the-art
practice of ascertaining the timing behavior of
current-generation real-time embedded systems. The
adoption of more complex hardware to respond to the
increasing demand for computing power in
next-generation systems exacerbates some of the
limitations of static timing analysis. In particular,
the effort of acquiring (1) detailed information on the
hardware to develop an accurate model of its execution
latency as well as (2) knowledge of the timing behavior
of the program in the presence of varying hardware
conditions, such as those dependent on the history of
previously executed instructions. We call these
problems the timing analysis walls. In this
vision-statement article, we present probabilistic
timing analysis, a novel approach to the analysis of
the timing behavior of next-generation real-time
embedded systems. We show how probabilistic timing
analysis attacks the timing analysis walls; we then
illustrate the mathematical foundations on which this
method is based and the challenges we face in the
effort of efficiently implementing it. We also present
experimental evidence that shows how probabilistic
timing analysis reduces the extent of knowledge about
the execution platform required to produce
probabilistically accurate WCET estimations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Abbas:2013:PTL,
author = "Houssam Abbas and Georgios Fainekos and Sriram
Sankaranarayanan and Franjo Ivanci{\'c} and Aarti
Gupta",
title = "Probabilistic Temporal Logic Falsification of
Cyber-Physical Systems",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "95:1--95:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465797",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a Monte-Carlo optimization technique for
finding system behaviors that falsify a metric temporal
logic (MTL) property. Our approach performs a random
walk over the space of system inputs guided by a
robustness metric defined by the MTL property.
Robustness is guiding the search for a falsifying
behavior by exploring trajectories with smaller
robustness values. The resulting testing framework can
be applied to a wide class of cyber-physical systems
(CPS). We show through experiments on complex system
models that using our framework can help automatically
falsify properties with more consistency as compared to
other means, such as uniform sampling.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Forte:2013:ETA,
author = "Domenic Forte and Ankur Srivastava",
title = "Energy- and Thermal-Aware Video Coding via
Encoder\slash Decoder Workload Balancing",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "96:1--96:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465798",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Video coding and compression are essential components
of multimedia services but are known to be
computationally intensive and energy demanding.
Traditional video coding paradigms, predictive and
distributed video coding (PVC and DVC), result in
excessive computation at either the encoder (PVC) or
decoder (DVC). Several recent papers have proposed a
hybrid PVC/DVC codec which shares the video coding
workload between encoder and decoder. In this article,
we propose a controller for such hybrid coders that
considers energy and temperature to dynamically split
the coding workload of a system comprised of one
encoder and one decoder. We also present two heuristic
algorithms for determining safe operating temperatures
in the controller solution: (1) stable state thermal
modeling algorithm, which focuses on long term
temperatures, and (2) transient thermal modeling
algorithm, which is better for short-term thermal
behavior. Results show that the proposed algorithms
result in more balanced energy utilization, improve
overall system lifetime, and reduce operating
temperatures when compared to strictly PVC and DVC
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Uzelac:2013:HBL,
author = "Vladimir Uzelac and Aleksandar Milenkovi{\'c}",
title = "Hardware-Based Load Value Trace Filtering for
On-the-Fly Debugging",
journal = j-TECS,
volume = "12",
number = "2s",
pages = "97:1--97:??",
month = may,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2465787.2465799",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jun 6 06:53:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Capturing program and data traces during program
execution unobtrusively on-the-fly is crucial in
debugging and testing of cyber-physical systems.
However, tracing a complete program unobtrusively is
often cost-prohibitive, requiring large on-chip trace
buffers and wide trace ports. This article describes a
new hardware-based load data value filtering technique
called Cache First-access Tracking. Coupled with an
effective variable encoding scheme, this technique
achieves a significant reduction of load data value
traces, from 5.86 to 56.39 times depending on the data
cache size, thus enabling cost-effective, unobtrusive
on-the-fly tracing and debugging.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2013:SAE,
author = "Fengxiang Zhang and Alan Burns",
title = "Schedulability analysis of {EDF}-scheduled embedded
real-time systems with resource sharing",
journal = j-TECS,
volume = "12",
number = "3",
pages = "67:1--67:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Earliest Deadline First (EDF) is the most widely
studied optimal dynamic scheduling algorithm for
uniprocessor real-time systems. In the existing
literature, however, there is no complete exact
analysis for EDF scheduling when both resource sharing
and release jitter are considered. Since resource
sharing and release jitter are important
characteristics of embedded real-time systems, a solid
theoretical foundation should be provided for EDF
scheduled systems. In this paper, we extend traditional
processor demand analysis to let arbitrary deadline
real-time tasks share non-preemptable resources and
suffer release jitter. A complete and exact
schedulability analysis for EDF scheduled systems is
provided. This analysis is incorporated into QPA (Quick
Processor-demand Analysis) which provides an efficient
implementation of the exact test.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ben-Asher:2013:UMP,
author = "Yosi Ben-Asher and Nadav Rotem",
title = "Using memory profile analysis for automatic synthesis
of pointers code",
journal = j-TECS,
volume = "12",
number = "3",
pages = "68:1--68:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "One of the main advantages of high-level synthesis
(HLS) is the ability to synthesize circuits that can
access multiple memory banks in parallel. Current HLS
systems synthesize parallel memory references based on
explicit array declarations in the source code. We
consider the need to synthesize not only array
references but also memory operations targeting
pointers and dynamic data structures. This paper
describes Automatic Memory Partitioning, a method for
automatically synthesizing general data structures
(arrays and pointers) into multiple memory banks for
increased parallelism and performance. We use source
code instrumentation to collect memory traces in order
to detect linear memory access patterns. The memory
traces are used to split data structures into disjoint
memory regions and determine which segments may benefit
from parallel memory access. We present an algorithm
for allocating memory segments into multiple memory
banks. Experiments show significant improvements in
performance while conserving the number of memory
banks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2013:RAB,
author = "Fumin Zhang and Zhenwu Shi and Shayok Mukhopadhyay",
title = "Robustness analysis for battery-supported
cyber-physical systems",
journal = j-TECS,
volume = "12",
number = "3",
pages = "69:1--69:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article establishes a novel analytical approach
to quantify robustness of scheduling and battery
management for battery supported cyber-physical
systems. A dynamic schedulability test is introduced to
determine whether tasks are schedulable within a finite
time window. The test is used to measure robustness of
a real-time scheduling algorithm by evaluating the
strength of computing time perturbations that break
schedulability at runtime. Robustness of battery
management is quantified analytically by an adaptive
threshold on the state of charge. The adaptive
threshold significantly reduces the false alarm rate
for battery management algorithms to decide when a
battery needs to be replaced.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Voros:2013:MHD,
author = "Nikolaos S. Voros and Michael H{\"u}bner and
J{\"u}rgen Becker and Matthias K{\"u}hnle and Florian
Thomaitiv and Arnaud Grasset and Paul Brelet and
Philippe Bonnot and Fabio Campi and Eberhard
Sch{\"u}ler and Henning Sahlbach and Sean Whitty and
Rolf Ernst and Enrico Billich and Claudia Tischendorf
and Ulrich Heinkel and Frank Ieromnimon and Dimitrios
Kritharidis and Axel Schneider and Joachim Knaeblein
and Wolfram Putzke-R{\"o}ming",
title = "{MORPHEUS}: a heterogeneous dynamically reconfigurable
platform for designing highly complex embedded
systems",
journal = j-TECS,
volume = "12",
number = "3",
pages = "70:1--70:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recently, system designers are facing the challenge of
developing systems that have diverse features, are more
complex and more powerful, with less power consumption
and reduced time to market. These contradictory
constraints have forced technology providers to pursue
design solutions that will allow design teams to meet
the above design targets. In that respect, this paper
introduces an innovative technology platform, called
MORPHEUS, which intents to provide complete design
framework for dealing with the aforementioned
challenges. MORPHEUS consists of a state of the art
architecture that encompasses heterogeneous
reconfigurable accelerators for implementing on the
same hardware architecture applications with varying
characteristics and a tool chain that, through a
software oriented approach, eases the implementation of
highly complex applications with heterogeneous
characteristics. The proposed approach has been tested
and evaluated through state of the art cases studies
borrowed from complementary application domains.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Crenne:2013:CMS,
author = "J{\'e}r{\'e}mie Crenne and Romain Vaslin and Guy
Gogniat and Jean-Philippe Diguet and Russell Tessier
and Deepak Unnikrishnan",
title = "Configurable memory security in embedded systems",
journal = j-TECS,
volume = "12",
number = "3",
pages = "71:1--71:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "System security is an increasingly important design
criterion for many embedded systems. These systems are
often portable and more easily attacked than
traditional desktop and server computing systems. Key
requirements for system security include defenses
against physical attacks and lightweight support in
terms of area and power consumption. Our new approach
to embedded system security focuses on the protection
of application loading and secure application
execution. During secure application loading, an
encrypted application is transferred from on-board
flash memory to external double data rate synchronous
dynamic random access memory (DDR-SDRAM) via a
microprocessor. Following application loading, the
core-based security technique provides both
confidentiality and authentication for data stored in a
microprocessor's system memory. The benefits of our low
overhead memory protection approaches are demonstrated
using four applications implemented in a
field-programmable gate array (FPGA) in an embedded
system prototyping platform. Each application requires
a collection of tasks with varying memory security
requirements. The configurable security core
implemented on-chip inside the FPGA with the
microprocessor allows for different memory security
policies for different application tasks. An average
memory saving of 63\% is achieved for the four
applications versus a uniform security approach. The
lightweight circuitry included to support application
loading from flash memory adds about 10\% FPGA area
overhead to the processor-based system and main memory
security hardware.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2013:AEE,
author = "Shaoshan Liu and Richard Neil Pittman and Alessandro
Forin and Jean-Luc Gaudiot",
title = "Achieving energy efficiency through runtime partial
reconfiguration on reconfigurable systems",
journal = j-TECS,
volume = "12",
number = "3",
pages = "72:1--72:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "One major advantage of reconfigurable computing
systems is their ability to reconfigure hardware at
runtime. In this paper, we study the feasibility of
achieving energy efficiency in reconfigurable computing
systems (e.g., FPGAs) through runtime partial
reconfiguration (PR) techniques. In the ideal scenario,
we use a hardware accelerator to accelerate certain
parts of the program execution; when the accelerator is
not active, we use partial reconfiguration to unload it
to reduce power consumption. Since the reconfiguration
process may introduce a high energy overhead, it is
unclear whether this approach is efficient. To approach
this problem, we first analytically identify the
conditions under which partial reconfiguration can
reduce energy consumption. Our results indicate that
the key to reduce partial reconfiguration energy
overhead is to minimize the time overhead of the
reconfiguration process. Based on this analysis, we
design and implement a fast reconfiguration engine that
achieves close-to-ideal throughput on Xilinx Virtex-4
FPGAs. Our fast reconfiguration engine utilizes a
master-slave DMA pair to stream data between the SRAM
and the Internal Configuration Access Port (ICAP). We
experimentally verify our proposed solutions and
compare our design to existing energy reduction
techniques, such as clock gating. The results of our
study show that by using partial reconfiguration to
eliminate the power consumption of the accelerator when
it is inactive, we can accelerate program execution and
at the same time reduce the overall energy consumption
by half.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dong:2013:PRS,
author = "Qi Dong and Donggang Liu and Peng Ning",
title = "Providing {DoS} resistance for signature-based
broadcast authentication in sensor networks",
journal = j-TECS,
volume = "12",
number = "3",
pages = "73:1--73:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent studies have demonstrated that it is feasible
to perform public key cryptographic operations on
resource-constrained sensor platforms. However, the
significant energy consumption introduced by public key
operations makes any public key-based protocol an easy
target of Denial-of-Service (DoS) attacks. For example,
if digital signature schemes such as ECDSA are used
directly for broadcast authentication without further
protection, an attacker can simply broadcast fake
messages and force the receiving nodes to perform a
huge number of unnecessary signature verifications,
eventually exhausting their battery power. This paper
shows how to mitigate such DoS attacks when digital
signatures are used for broadcast authentication in
sensor networks. Specifically, this paper first
presents two filtering techniques, the group-based
filter and the key chain-based filter, to handle the
DoS attacks against signature verification. Both
methods can significantly reduce the number of
unnecessary signature verifications when a sensor node
is under DoS attacks. This paper then combines these
two filters and proposes a hybrid solution to further
improve the performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Falk:2013:RBQ,
author = "Joachim Falk and Christian Zebelein and Christian
Haubelt and J{\"u}rgen Teich",
title = "A rule-based quasi-static scheduling approach for
static islands in dynamic dataflow graphs",
journal = j-TECS,
volume = "12",
number = "3",
pages = "74:1--74:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, an efficient rule-based clustering
algorithm for static dataflow subgraphs in a dynamic
dataflow graph is presented. The clustered static
dataflow actors are quasi-statically scheduled, in such
a way that the global performance in terms of latency
and throughput is improved compared to a dynamically
scheduled execution, while avoiding the introduction of
deadlocks as generated by naive static scheduling
approaches. The presented clustering algorithm
outperforms previously published approaches by a faster
computation and more compact representation of the
derived quasi-static schedule. This is achieved by a
rule-based approach, which avoids an explicit
enumeration of the state space. A formal proof of the
correctness of the presented clustering approach is
given. Experimental results show significant
improvements in both, performance and code size,
compared to a state-of-the-art clustering algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ost:2013:PAD,
author = "Luciano Ost and Marcelo Mandelli and Gabriel Marchesan
Almeida and Leandro Moller and Leandro Soares Indrusiak
and Gilles Sassatelli and Pascal Benoit and Manfred
Glesner and Michel Robert and Fernando Moraes",
title = "Power-aware dynamic mapping heuristics for {NoC}-based
{MPSoCs} using a unified model-based approach",
journal = j-TECS,
volume = "12",
number = "3",
pages = "75:1--75:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The mapping of tasks to processing elements of an
MPSoC has critical impact on system performance and
energy consumption. To cope with complex dynamic
behavior of applications, it is common to perform task
mapping during runtime so that the utilization of
processors and interconnect can be taken into account
when deciding the allocation of each task. This paper
has two major contributions, one of them targeting the
general problem of evaluating dynamic mapping
heuristics in NoC-based MPSoCs, and another focusing on
the specific problem of finding a task mapping that
optimizes energy consumption in those architectures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2013:JVP,
author = "Tiantian Liu and Chun Jason Xue and Minming Li",
title = "Joint variable partitioning and bank selection
instruction optimization for partitioned memory
architectures",
journal = j-TECS,
volume = "12",
number = "3",
pages = "76:1--76:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "About 55\% of all CPUs sold in the world are 8-bit
microcontrollers or microprocessors which can only
access limited memory space without extending address
buses. Partitioned memory with bank switching is a
technique to increase memory size without extending
address buses. Bank Selection Instructions (BSLs) need
to be inserted into the original programs to modify the
bank register to point to the desired banks. These BSLs
introduce both code size and execution time overheads.
In this paper, we partition variables into different
banks and insert BSLs at different positions of
programs so that the overheads can be minimized.
Minimizing speed (execution time) overhead and
minimizing space (code size) overhead are two
objectives investigated in this paper. A multi-copy
approach is also proposed to store multiple copies of
several variables on different banks when the memory
space allows. It takes the read/write properties of
variables into consideration and achieves more BSL
overhead reduction. Experiments show that the proposed
algorithms can reduce BSL overheads effectively
compared to state-of-the-art techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2013:WAR,
author = "Jingtong Hu and Chun Jason Xue and Qingfeng Zhuge and
Wei-Che Tseng and Edwin H.-M. Sha",
title = "Write activity reduction on non-volatile main memories
for embedded chip multiprocessors",
journal = j-TECS,
volume = "12",
number = "3",
pages = "77:1--77:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent advances in circuit and semiconductor
technologies have pushed Non-Volatile Memory (NVM)
technologies into a new era. These technologies exhibit
appealing properties such as low power consumption,
non-volatility, shock-resistivity, and high density.
However, there are challenges to which we need answers
in the road of applying non-volatile memories as main
memory in embedded computer systems. First, when
compared with DRAM, NVMs have a limited number of
write/erase cycles. Second, write activities on NVM are
more expensive than DRAM memory in terms of energy
consumption and access latency. Both challenges will
benefit from the reduction of the write activities on
the NVMs. In this paper, we target embedded Chip
Multiprocessors (CMPs) with Scratch Pad Memory (SPM)
and non-volatile main memory. We introduce scheduling,
data migration, and recomputation techniques to reduce
the number of write activities on NVMs. Experimental
results show that the proposed methods can reduce the
number of writes by 58.46\% on average, which means
that the NVM can last 2.8 times as long as before. For
Phase Change Memory (PCM), the lifetime is extended
from 2.5 years to about 7 years on average and 15 years
at the most. Also, the finish time of the tested
programs is reduced by an average of 38.07\%, and the
energy consumption is reduced by an average of
51.23\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baruah:2013:PST,
author = "Sanjoy Baruah",
title = "Partitioning sporadic task systems upon
memory-constrained multiprocessors",
journal = j-TECS,
volume = "12",
number = "3",
pages = "78:1--78:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most prior theoretical research on real-time
partitioning algorithms for multiprocessor platforms
has focused on ensuring that the cumulative computing
requirements of the tasks assigned to each processor
does not exceed the processor's processing power.
However, computing capacity is often not the only
limiting resource: on many multiprocessor platforms
each individual computing unit may have limited amounts
of multiple additional types of resources (such as
local memory) in addition to having limited processing
power. We present algorithms for partitioning a
collection of sporadic tasks, each characterized by a
WCET, a relative deadline, and a period, upon a
multiprocessor platform in a manner that is cognizant
of such additional constraints as well as the
processing capacity constraints.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paolieri:2013:HRT,
author = "Marco Paolieri and J{\"o}rg Mische and Stefan Metzlaff
and Mike Gerdes and Eduardo Qui{\~n}ones and Sascha
Uhrig and Theo Ungerer and Francisco J. Cazorla",
title = "A hard real-time capable multi-core {SMT} processor",
journal = j-TECS,
volume = "12",
number = "3",
pages = "79:1--79:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hard real-time applications in safety critical domains
require high performance and time analyzability.
Multi-core processors are an answer to these demands,
however task interferences make multi-cores more
difficult to analyze from a worst-case execution time
point of view than single-core processors. We propose a
multi-core SMT processor that ensures a bounded maximum
delay a task can suffer due to inter-task
interferences. Multiple hard real-time tasks can be
executed on different cores together with additional
non real-time tasks. Our evaluation shows that the
proposed MERASA multi-core provides predictability for
hard real-time tasks and also high performance for non
hard real-time tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yun:2013:DHS,
author = "Jeong-Han Yun and Chul-Joo Kim and Seonggun Kim and
Kwang-Moo Choe and Taisook Han",
title = "Detection of harmful schizophrenic statements in
{Esterel}",
journal = j-TECS,
volume = "12",
number = "3",
pages = "80:1--80:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In imperative synchronous languages, a statement is
called schizophrenic if it is executed more than once
in a single clock. When a schizophrenic statement is
translated into a circuit, the circuit can behave
abnormally because of the multiple executions. To solve
the problems caused by schizophrenic statements,
compilers duplicate the statements to avoid multiple
executions. Esterel is an imperative synchronous
language. Schizophrenic statements in Esterel are
considered to occur due to the instantaneous reentrance
of local signal declarations or parallel statements.
However, if the corresponding circuit of a
schizophrenic statement behaves normally, it is
harmless and thus curing is not necessary. In this
paper, we identify the conditions under which a
schizophrenic statement of the Esterel program must be
cured during circuit translation. We also propose an
algorithm to detect schizophrenic statements that have
to be cured on the control flow graphs (CFGs) of source
codes. Our algorithm detects all schizophrenic
statements that have to be cured and results in fewer
false alarms on the benchmark programs used in the
previous work. It is simple and based on the CFG of a
program, implying that it can be merged into existing
compilers easily.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baek:2013:EEH,
author = "Seungjae Baek and Jongmoo Choi and Donghee Lee and Sam
H. Noh",
title = "Energy-efficient and high-performance software
architecture for storage class memory",
journal = j-TECS,
volume = "12",
number = "3",
pages = "81:1--81:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recently, interest in incorporating Storage Class
Memory (SCM), which blurs the distinction between
memory and storage, into mainstream computing has been
increasing rapidly. In this paper, we address the
emerging questions regarding the use of SCM. Based on
an embedded platform that employs FeRAM, a type of SCM,
we present our findings. In summary, by introducing
SCM, power efficiency improves while performance is
degraded. We also show that such performance
degradations may be removed with operating system level
schemes that fully exploit the characteristics of SCM.
Finally, we present permanent computing that supports
lightweight system on/off capabilities by using SCM.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2013:HPL,
author = "Dongwon Lee and Marilyn Wolf and Shuvra S.
Bhattacharyya",
title = "High-performance and low-energy buffer mapping method
for multiprocessor {DSP} systems",
journal = j-TECS,
volume = "12",
number = "3",
pages = "82:1--82:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "When implementing digital signal processing (DSP)
applications onto multiprocessor systems, one
significant problem in the viewpoints of performance is
the memory wall. In this paper, to help alleviate the
memory wall problem, we propose a novel,
high-performance buffer mapping policy for
SDF-represented DSP applications on bus-based
multiprocessor systems that support the shared-memory
programming model. The proposed policy exploits the
bank concurrency of the DRAM main memory system
according to the analysis of hierarchical parallelism.
Energy consumption is also a critical parameter,
especially in battery-based embedded computing systems.
In this paper, we apply a synchronization back-off
scheme on the top of the proposed high-performance
buffer mapping policy to reduce energy consumption. The
energy saving is attained by minimizing the number of
non-essential synchronization transactions. We measure
throughput and energy consumption on both synthetic and
real benchmarks. The simulation results show that the
proposed buffer mapping policy is very useful in terms
of performance, especially in memory-intensive
applications where the total execution time of
computational tasks is relatively small compared to
that of memory operations. In addition, the proposed
synchronization back-off scheme provides a reduction in
the number of synchronization transactions without
degrading performance, which results in system energy
saving.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tripakis:2013:CSD,
author = "Stavros Tripakis and Dai Bui and Marc Geilen and Bert
Rodiers and Edward A. Lee",
title = "Compositionality in synchronous data flow: Modular
code generation from hierarchical {SDF} graphs",
journal = j-TECS,
volume = "12",
number = "3",
pages = "83:1--83:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hierarchical SDF models are not compositional: a
composite SDF actor cannot be represented as an atomic
SDF actor without loss of information that can lead to
rate inconsistency or deadlock. Motivated by the need
for incremental and modular code generation from
hierarchical SDF models, we introduce in this paper
DSSF profiles. DSSF (Deterministic SDF with Shared
FIFOs) forms a compositional abstraction of composite
actors that can be used for modular compilation. We
provide algorithms for automatic synthesis of
non-monolithic DSSF profiles of composite actors given
DSSF profiles of their sub-actors. We show how
different trade-offs can be explored when synthesizing
such profiles, in terms of compactness (keeping the
size of the generated DSSF profile small) versus
reusability (maintaining necessary information to
preserve rate consistency and deadlock-absence) as well
as algorithmic complexity. We show that our method
guarantees maximal reusability and report on a
prototype implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zimmerman:2013:MBR,
author = "Andrew T. Zimmerman and Jerome P. Lynch and Frank T.
Ferrese",
title = "Market-based resource allocation for distributed data
processing in wireless sensor networks",
journal = j-TECS,
volume = "12",
number = "3",
pages = "84:1--84:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In recent years, improved wireless technologies have
enabled the low-cost deployment of large numbers of
sensors for a wide range of monitoring applications.
Because of the computational resources (processing
capability, storage capacity, etc.) collocated with
each sensor in a wireless network, it is often possible
to perform advanced data analysis tasks autonomously
and in-network, eliminating the need for the
post-processing of sensor data. With new parallel
algorithms being developed for in-network computation,
it has become necessary to create a framework in which
all of a wireless network's scarce resources (CPU time,
wireless bandwidth, storage capacity, battery power,
etc.) can be best utilized in the midst of competing
computational requirements. In this study, a
market-based method is developed to autonomously
distribute these scarce network resources across
various computational tasks with competing objectives
and/or resource demands. This method is experimentally
validated on a network of wireless sensing prototypes,
where it is shown to be capable of Pareto-optimally
allocating scarce network resources. Then, it is
applied to the real-world problem of rupture detection
in shipboard chilled water systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mu:2013:POS,
author = "Jingqing Mu and Karthik Shankar and Roman Lysecky",
title = "Profiling and online system-level performance and
power estimation for dynamically adaptable embedded
systems",
journal = j-TECS,
volume = "12",
number = "3",
pages = "85:1--85:??",
month = mar,
year = "2013",
CODEN = "????",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 28 17:38:27 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Significant research has demonstrated the performance
and power benefits of runtime dynamic reconfiguration
of FPGAs and microprocessor/FPGA devices. For
dynamically reconfigurable systems, in which the
selection of hardware coprocessors to implement within
the FPGA is determined at runtime, online estimation
methods are needed to evaluate the performance and
power consumption impact of the hardware coprocessor
selection. In this paper, we present a profile assisted
online system-level performance and power estimation
framework for estimating the speedup and power
consumption of dynamically reconfigurable embedded
systems. We evaluate the accuracy and fidelity of our
online estimation framework for dynamic hardware kernel
selection to maximize performance or minimize the
system power consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jafari:2013:ISS,
author = "Roozbeh Jafari and John Lach and Majid Sarrafzadeh and
William Kaiser",
title = "Introduction to the special section on wireless health
systems",
journal = j-TECS,
volume = "12",
number = "4",
pages = "98:1--98:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485986",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wijsman:2013:TME,
author = "Jacqueline Wijsman and Bernard Grundlehner and Julien
Penders and Hermie Hermens",
title = "Trapezius muscle {EMG} as predictor of mental stress",
journal = j-TECS,
volume = "12",
number = "4",
pages = "99:1--99:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485987",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Stress is a growing problem in society and can cause
musculoskeletal complaints. It would be useful to
measure stress for prevention of stress-related health
problems. An experiment is described in which EMG
signals of the upper trapezius muscle were measured
with a wireless system during three different stressful
conditions: a calculation task (the Norinder test), a
logical puzzle task and a memory task. The latter two
tests were newly designed and aimed at creating
circumstances that are similar to work stress.
Amplitudes of the EMG signals were significantly higher
during stress compared to rest (+2.6\% of reference
contraction level) and relative time with EMG gaps was
lower during stress (-14.3\% of time). Also, mean and
median frequencies were significantly lower during
stress than during rest (-8.6 and -8.8 Hz,
respectively). EMG amplitude increased not only from
rest to stress conditions, but also during stressful
conditions and decreased during relaxation periods. EMG
features correlated with subjectively indicated stress
levels (correlations of 0.32 with RMS and -0.32 with
relative gaptime). The results indicate that EMG is a
useful parameter to detect stress. Together with other
physiological sensors, EMG sensors can be included in a
wireless system for ambulatory monitoring of stress
levels.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wouhaybi:2013:ECM,
author = "Rita H. Wouhaybi and Mark D. Yarvis and Sangita Sharma
and Philip Muse and Chieh-Yih Wan and Sai Prasad and
Lenitra Durham and Ritu Sahni and Robert Norton and
Merlin Curry and Holly Jimison and Richard Harper and
Robert A. Lowe",
title = "Experiences with context management in emergency
medicine",
journal = j-TECS,
volume = "12",
number = "4",
pages = "100:1--100:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485988",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In emergency medicine, patient care is intense and
stressful, often requiring paramedics to consult with
remote physicians to convey the patient's condition. We
present a framework for context-management in
telemedicine developed in collaboration between
engineers, physicians, and paramedics. We describe a
mobile platform and embedded wireless sensors to
capture physiological and audio context into a
comprehensive patient record, accessible locally and
remotely. We describe a first evaluation of this
technology by trained paramedics in simulated scenarios
and evaluate key aspects of system performance. Early
results suggest that wireless sensing can provide
reliable and low latency data both locally and to
remote physicians. In addition, audio context capture
is a promising approach to capturing a comprehensive
patient record, with a low rate of medically important
errors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Valtazanos:2013:LSS,
author = "Aris Valtazanos and D. K. Arvind and Subramanian
Ramamoorthy",
title = "Latent space segmentation for mobile gait analysis",
journal = j-TECS,
volume = "12",
number = "4",
pages = "101:1--101:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485989",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "An unsupervised learning algorithm is presented for
segmentation and evaluation of motion data from the
on-body Orient wireless motion capture system for
mobile gait analysis. The algorithm is model-free and
operates on the latent space of the motion, by first
aggregating all the sensor data into a single vector,
and then modeling them on a low-dimensional manifold to
perform segmentation. The proposed approach is
contrasted to a basic, model-based algorithm, which
operates directly on the joint angles computed by the
Orient sensor devices. The latent space algorithm is
shown to be capable of retrieving qualitative features
of the motion even in the face of noisy or incomplete
sensor readings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Masse:2013:MWE,
author = "Fabien Mass{\'e} and Martien {Van Bussel} and Aline
Serteyn and Johan Arends and Julien Penders",
title = "Miniaturized wireless {ECG} monitor for real-time
detection of epileptic seizures",
journal = j-TECS,
volume = "12",
number = "4",
pages = "102:1--102:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485990",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent advances in miniaturization of ultra-low power
components allow for more intelligent wearable health
monitors. The development and evaluation of a wireless
wearable electrocardiogram (ECG) monitor to detect
epileptic seizures from changes in the cardiac rhythm
is described. The ECG data are analyzed by embedded
algorithms: a robust beat-detection algorithm combined
with a real-time epileptic seizure detector. In its
current implementation, the proposed prototype is 52$
\times $ 36$ \times $ 15mm$^3$, and has an autonomy of
one day. Based on data collected on the first three
epilepsy patients, preliminary clinical results are
provided. Wireless, miniaturized and comfortable, this
prototype opens new perspectives for health
monitoring.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chi:2013:WNE,
author = "Yu M. Chi and Patrick Ng and Gert Cauwenberghs",
title = "Wireless noncontact {ECG} and {EEG} biopotential
sensors",
journal = j-TECS,
volume = "12",
number = "4",
pages = "103:1--103:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485991",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wearable, unobtrusive and patient friendly
physiological sensors will be a key driving force in
the wireless health revolution. Cardiac (ECG) and brain
(EEG) signals are two important signal modalities
indicative of healthy and diseased states of body and
mind that directly benefit from long-term monitoring.
Despite advancements in wireless and embedded
electronics technology, however, ECG/EEG monitoring
devices still face problems with patient compliance and
comfort from the use wet/gel electrodes. We have
developed two wireless biopotential instrumentation
systems using noncontact electrodes that can operate
without direct skin contact and through thin layers of
fabric. The first system is a general purpose
replacement for traditional ECG/EEG telemetry systems
and the second is a compact, fully self-contained
wireless ECG tag. All of the issues relating to the
design of low noise, high performance noncontact
sensors are discussed along with full technical
details, circuit schematics and construction
techniques. The noncontact electrode has been
integrated into both a wearable ECG chest harness as
well an EEG headband and characterized in a battery of
experiments that represent potential health
applications including resting ECG, exercise ECG and
EEG directly against standard clinical adhesive
Ag\slash AgCl electrodes. With careful design and
secure mechanical harnesses the noncontact sensor is
capable of approaching the quality of conventional
electrodes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cardo:2013:ISS,
author = "Jos{\'e} Flich Cardo and Maurizio Palesi",
title = "Introduction to the special section on on-chip and
off-chip network architectures",
journal = j-TECS,
volume = "12",
number = "4",
pages = "104:1--104:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485992",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yu:2013:ANC,
author = "Qiaoyan Yu and Meilin Zhang and Paul Ampadu",
title = "Addressing network-on-chip router transient errors
with inherent information redundancy",
journal = j-TECS,
volume = "12",
number = "4",
pages = "105:1--105:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485993",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We exploit the inherent information redundancy in the
control path of Network-on-Chip (NoC) routers to manage
transient errors, preventing packet loss and
misrouting. Outputs of the routing arbitration units in
NoC routers can be used to determine arbitration
failures, because the valid arbitration outputs are a
subset of all possible values. This feature is
exploited to detect and correct logic and register
errors in the router arbitration control path. The
proposed method is complementary to other error
management methods for NoC routers. An analytical
reliability model of our method is provided, including
parameters such as logic unit size, different error
rates for logic gates and registers, and the location
of faulty elements. Compared to triple-modular
redundancy (TMR), the proposed method improves the
arbiter reliability by two orders of magnitude while
reducing the total area and power by 43\% and 64\%,
respectively. In the presented case studies, two
traffic traces from the PARSEC benchmark suite are used
to evaluate the average latency and energy consumption.
Simulations performed on a 4$ \times $ 4 NoC show that
our method reduces the average latency by up to 50\%
and reduces average energy by up to 70\% compared to
other methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghiribaldi:2013:CST,
author = "Alberto Ghiribaldi and Daniele Ludovici and Francisco
Trivi{\~n}o and Alessandro Strano and Jos{\'e} Flich
and Jos{\'e} Luis S{\'a}nchez and Francisco Alfaro and
Michele Favalli and Davide Bertozzi",
title = "A complete self-testing and self-configuring {NoC}
infrastructure for cost-effective {MPSoCs}",
journal = j-TECS,
volume = "12",
number = "4",
pages = "106:1--106:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485994",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
note = "See comment \cite{Bishnoi:2015:BCC}.",
abstract = "Networks-on-chip need to survive to manufacturing
faults in order to sustain yield. An effective testing
and configuration strategy however implies two opposite
requirements. One one hand, a fast and scalable
built-in self-testing and self-diagnosis procedure has
to be carried out concurrently at NoC switches. On the
other hand, programming the NoC routing mechanism to go
around faulty links and switches can be optimally
performed by a centralized controller with global
network visibility. To the best of our knowledge, this
article proposes for the first time a global network
testing and configuration strategy that meets the
opposite requirements by means of a fault-tolerant dual
network architecture and a fast configuration algorithm
for the most common failure patterns. Experimental
results report an area overhead as low as 12.5\% with
respect to the baseline switch architecture while
achieving a high degree of fault tolerance. In fact,
even when multiple stuck-at faults are considered, the
capability of fault masking by the dual network is
always over 80\%, and the support for multiple link
failures is more than 90\% in presence of two unusable
links in the main network with minimum set-up times.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sem-Jacobsen:2013:ELC,
author = "Frank Olaf Sem-Jacobsen and Samuel Rodrigo and Tor
Skeie and Alessandro Strano and Davide Bertozzi",
title = "An efficient, low-cost routing framework for convex
mesh partitions to support virtualization",
journal = j-TECS,
volume = "12",
number = "4",
pages = "107:1--107:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485995",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "At the core of an efficient chip multiprocessors (CMP)
is support for unicast and multicast routing, low
implementation costs, and the ability to isolate
concurrent applications with maximum utilization of the
CMP. We present an efficient logic-based unicast and
multicast routing algorithm that guarantees isolation
of local application traffic within any near-convex
region on the chip, and the algorithms to recognize
supported partitions and configure the cores
accordingly. Evaluations show that the routing
algorithm has a 57{\&}percent; more compact
implementation than a recent multicast solution with
the same coverage, and it achieves 5{\&}percent; higher
throughput with 13{\&}percent; lower latency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "107",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seiculescu:2013:DBE,
author = "Ciprian Seiculescu and Dara Rahmati and Srinivasan
Murali and Hamid Sarbazi-Azad and Luca Benini and
Giovanni {De Micheli}",
title = "Designing best effort networks-on-chip to meet hard
latency constraints",
journal = j-TECS,
volume = "12",
number = "4",
pages = "108:1--108:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485996",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many classes of applications require Quality of
Service (QoS) guarantees from the system interconnect.
In Networks-on-Chip (NoC) QoS guarantees usually
translate into bandwidth and latency constraints for
the traffic flows and require hardware support in the
NoC fabric and its interfaces. In this article we
present a novel NoC synthesis framework to
automatically build networks that meet hard latency
constraints of end-to-end traffic streams without
requiring specialized hardware for the network
components. The hard latency constraints are met by
carefully designing the NoC topology and selecting the
appropriate routes for flow using lean best-effort
network components. We perform experiments on several
System on Chip (SoC) benchmarks. We compared against a
topology synthesis method with no support for real-time
constraints and we show that the proposed method can
produce topologies that can meet significantly tighter
worst case latency constraints (on average 44\%). We
also show that the tightest worst case latency can be
provided with little overhead on power consumption (on
average 8.5\%).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "108",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zahavi:2013:GNL,
author = "Eitan Zahavi and Israel Cidon and Avinoam Kolodny",
title = "{Gana}: a novel low-cost conflict-free {NoC}
architecture",
journal = j-TECS,
volume = "12",
number = "4",
pages = "109:1--109:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485997",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Similar to off-chip networks, current NoC
architectures are based on the store and forward of
uncoordinated end-to-end packet transmissions through
autonomous buffered routers. However, the monolithic
nature and the small physical dimensions of on chip
networks open up the opportunity for much more tightly
controlled architectures. We present GANA, a new Global
Arbiter NoC Architecture. In GANA, the transmission of
end-to-end data is timed by a global arbiter in a way
that avoids any queuing in the network. The arbitration
takes into account the complete transfer of the
end-to-end packets through the entire network path,
avoiding any intermediate queuing and hop-by-hop packet
arbitration. Consequently, buffers and arbiters are no
longer required in the routers, resulting in smaller
area and low power consumption. It is demonstrated
through detailed design and synthesis that the
additional area of the central arbiter and the control
path are negligible in comparison to the provided area
saving. For example, an 8$ \times $ 8 GANA consumes
only 16\% of the area of an equivalent autonomous NoC
while providing a better end-to-end throughput. The
end-to-end performance of GANA at high network loads is
typically much better than in a distributed-control
NOC, because resource contention and queuing in the
network are avoided. This comes at the cost of a few
percentage increase in latency at light loads due to
the additional arbitration phase. GANA architecture
combines the inherent benefits of a network
(parallelism and spatial reuse of links) with the
inherent benefits of high integration (global view of
the system state, central control, and
synchronization). The scalability of GANA is evaluated
analytically, showing that it can be superior to
fully-distributed networks in systems up to a size of
about 100 modules manufactured in 45nm technology,
which can be used today as well as in the foreseeable
future.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "109",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2013:NCA,
author = "Dongki Kim and Sungjoo Yoo and Sunggu Lee",
title = "A network congestion-aware memory subsystem for
manycore",
journal = j-TECS,
volume = "12",
number = "4",
pages = "110:1--110:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485998",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The network-on-chip (NoC) plays a crucial role in
memory performance due to the fact that it can handle
the majority of traffics from/to the DRAM memory
controllers. However, there has been little work on the
interplay between the NoC and memory controllers. In
this article, we address a problem called network
congestion-induced memory blocking and propose a novel
memory controller, which performs memory access
scheduling and network entry control in a network
congestion-aware manner. In case of network congestion,
in order to avoid performance degradation due to the
blocking caused by data bound for congested regions in
the NoC, the proposed memory controller favors requests
and data associated with uncongested regions. In
addition, in order to avoid the fairness problem of
such a policy, we also propose a gradual method, which
enables a trade-off between performance (in memory
utilization) and fairness (in memory access latency).
Experimental results show that the proposed method can
offer up to 1.76 to 2.99 times improvement in memory
utilization in the latency-tolerant designs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "110",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sem-Jacobsen:2013:EPE,
author = "Frank Olaf Sem-Jacobsen and Samuel Rodrigo and
Alessandro Strano and Tor Skeie and Davide Bertozzi and
Francisco Gilabert",
title = "Enabling power efficiency through dynamic rerouting
on-chip",
journal = j-TECS,
volume = "12",
number = "4",
pages = "111:1--111:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2485999",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Networks-on-chip (NoCs) are key components in
many-core chip designs. Dynamic power-awareness is a
new challenge present in NoCs that must be efficiently
handled by the routing functionality as it introduces
irregularities in the commonly used 2-D meshes. In this
article, we propose a logic-based routing algorithm,
iFDOR, oriented towards dynamic powering down one
region within every application partition on the chip
through dynamic rerouting, with low implementation
costs. Results show that we can successfully shutdown
an arbitrary rectangular region within an application
partition without significant impact on network
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "111",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anonymous:2013:AOS,
author = "Anonymous",
title = "Abstracts: Online Supplements Volume 12, Number 1s,
Volume 12, Number 2s",
journal = j-TECS,
volume = "12",
number = "4",
pages = "112:1--112:??",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2485984.2499550",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 1 18:28:35 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "112",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2013:SDM,
author = "Mo Li and Zheng Yang and Yunhao Liu",
title = "Sea depth measurement with restricted floating
sensors",
journal = j-TECS,
volume = "13",
number = "1",
pages = "1:1--1:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512448",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sea depth monitoring is a critical task for ensuring
safe operation of harbors. Traditional schemes largely
rely on labor-intensive work and expensive hardware.
This study explores the possibility of deploying
networked sensors on the surface of the sea, measuring
and reporting the sea depth of given areas. We propose
a Restricted Floating Sensors (RFS) model in which
sensor nodes are anchored to the sea bottom, floating
within a restricted area. Distinguished from
traditional stationary or mobile sensor networks, the
RFS network consists of sensor nodes with restricted
mobility. We construct the network model and elaborate
the corresponding localization problem. We show that by
locating such RFS sensors, the sea depth can be
estimated without the help of any extra ranging
devices. A prototype system with 25 Telos sensor nodes
is deployed to validate this design. We also examine
the efficiency and scalability of this design through
large-scale simulations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anand:2013:CCS,
author = "Madhukar Anand and Sebastian Fischmeister and Insup
Lee",
title = "A comparison of compositional schedulability analysis
techniques for hierarchical real-time systems",
journal = j-TECS,
volume = "13",
number = "1",
pages = "2:1--2:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501626.2501629",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Schedulability analysis of hierarchical real-time
embedded systems involves defining interfaces that
represent the underlying system faithfully and then
compositionally analyzing those interfaces. Whereas
commonly used abstractions, such as periodic and
sporadic tasks and their interfaces, are simple and
well studied, results for more complex and expressive
abstractions and interfaces based on task graphs and
automata are limited. One contributory factor may be
the hardness of compositional schedulability analysis
with task graphs and automata. Recently, conditional
task models, such as the recurring branching task
model, have been introduced with the goal of reaching a
middle ground in the trade-off between expressivity and
ease of analysis. Consequently, techniques for
compositional analysis with conditional models have
also been proposed, and each offer different
advantages. In this work, we revisit those techniques,
compare their advantages using an automotive case
study, and identify limitations that would need to be
addressed before adopting these techniques for use with
real-world problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{MartinezSantos:2013:LSA,
author = "Juan Carlos {Martinez Santos} and Yunsi Fei",
title = "Leveraging speculative architectures for runtime
program validation",
journal = j-TECS,
volume = "13",
number = "1",
pages = "3:1--3:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512456",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Program execution can be tampered with by malicious
attackers through exploiting software vulnerabilities.
Changing the program behavior by compromising control
data and decision data has become the most serious
threat in computer system security. Although several
hardware approaches have been presented to validate
program execution, they either incur great hardware
overhead or introduce false alarms. We propose a new
hardware-based approach by leveraging the existing
speculative architectures for runtime program
validation. The on-chip branch target buffer (BTB) is
utilized as a cache of the legitimate control flow
transfers stored in a secure memory region. In
addition, the BTB is extended to store the correct
program path information. At each indirect branch site,
the BTB is used to validate the decision history of
previous conditional branches and monitor the following
execution path at runtime. Implementation of this
approach is transparent to the upper operating system
and programs. Thus, it is applicable to legacy code.
Because of good code locality of the executable
programs and effectiveness of branch prediction, the
frequency of control-flow validations against the
secure off-chip memory is low. Our experimental results
show a negligible performance penalty and small storage
overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hsieh:2013:TAM,
author = "Ang-Chih Hsieh and Tingting Hwang",
title = "Thermal-aware memory mapping in {$3$D} designs",
journal = j-TECS,
volume = "13",
number = "1",
pages = "4:1--4:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512457",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "DRAM is usually used as main memory for program
execution. The thermal behavior of a memory block in a
3D SIP is affected not only by the power behavior but
also the heat dissipating ability of that block. The
power behavior of a block is related to the
applications run on the system, while the heat
dissipating ability is determined by the number of tier
and the position the block locates. Therefore, a
thermal-aware memory allocator should consider the
following two points. First, the allocator should
consider not only the power behavior of a logic block
but also the physical location during memory mapping
and second, the changing temperature of a physical
block during execution of programs. In this article, we
will propose a memory mapping algorithm taking into
consideration these two points. Our technique can be
classified as static thermal management to be applied
to embedded software designs. Experiments show that for
single-core systems, our method can reduce the
temperature of memory system by 17.1${}^\circ $C, as
compared to a straightforward mapping in the best case,
and 13.3${}^\circ $C on average. For systems with four
cores, the temperature reductions are 9.9${}^\circ $C
and 11.6${}^\circ $C on average when L1 cache of each
core is set to 4KB and 8KB, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bai:2013:SOS,
author = "Ke Bai and Aviral Shrivastava",
title = "A software-only scheme for managing heap data on
limited local memory ({LLM}) multicore processors",
journal = j-TECS,
volume = "13",
number = "1",
pages = "5:1--5:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501626.2501632",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a scheme for managing heap data
in the local memory present in each core of a limited
local memory (LLM) multicore architecture. Although
managing heap data semi-automatically with software
cache is feasible, it may require modifications of
other thread codes. Crossthread modifications are very
difficult to code and debug, and will become more
complex and challenging as we increase the number of
cores. In this article, we propose an intuitive
programming interface, which is an automatic and
scalable scheme for heap data management. Besides, for
embedded applications, where the maximum heap size can
be profiled, we propose several optimizations on our
heap management to significantly decrease the library
overheads. Our experiments on several benchmarks from
MiBench executing on the Sony Playstation 3 show that
our scheme is natural to use, and if we know the
maximum size of heap data, our optimizations can
improve application performance by an average of
14\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gu:2013:DDL,
author = "Ji Gu and Hui Guo and Tohru Ishihara",
title = "{DLIC}: Decoded loop instructions caching for
energy-aware embedded processors",
journal = j-TECS,
volume = "13",
number = "1",
pages = "6:1--6:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512464",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the explosive proliferation of embedded systems,
especially through countless portable devices and
wireless equipment used, embedded systems have become
indispensable to the modern society and people's life.
Those devices are often battery driven. Therefore, low
energy consumption in embedded processors is important
and becomes critical in step with the system
complexity. The on-chip instruction cache (I-cache) is
usually the most energy-consuming component on the
processor chip due to its large size and frequent
access operations. To reduce such energy consumption,
the existing loop cache approaches use a tiny decoded
cache to filter the I-cache access and instruction
decode activity for repeated loop iterations. However,
such designs are effective for small and simple loops,
and only suitable for DSP kernel-like applications.
They are not effectual for many embedded applications
where complex loops are common. In this article, we
propose a decoded loop instruction cache (DLIC) that is
small, hence energy efficient, yet can capture most
loops, including large nested ones with branch
executions, so that a significant amount of I-cache
accesses and instruction decoding can be eradicated.
The experiments on a set of embedded benchmarks show
that our proposed DLIC scheme can reduce energy
consumption by up to 87\% as compared to normal
cache-only design. On average, 66\% energy can be saved
on instruction fetching and decoding, while at a
performance overhead of only 1.4\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Stanley-Marbell:2013:LPP,
author = "Phillip Stanley-Marbell",
title = "{L24}: Parallelism, performance, energy efficiency,
and cost trade-offs in future sensor platforms",
journal = j-TECS,
volume = "13",
number = "1",
pages = "7:1--7:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512465",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Networks of sensors must process large amounts of
intermittently-available data in situ. This motivates
the investigation of means for achieving high
performance when required, but ultra-low-power
dissipation when idle. One approach to this challenge
is the use of embedded multiprocessor systems, leading
to trade-offs between parallelism, performance, energy
efficiency, and cost. To evaluate these trade-offs and
to gain insight for future system designs, this article
presents the design, implementation, and evaluation of
a miniature, energy-scalable, 24-processor module, L24,
for use in embedded sensor systems. Analytic results
and empirical evidence motivating such embedded
multiprocessors is provided, and a parallel fixed-point
fast Fourier transform implementation is presented.
This application is used as a challenging but realistic
evaluator of the presented hardware platform. Through a
combination of hardware measurements, instruction-level
microarchitectural simulation, and analytic modeling,
it is demonstrated that the platform provides idle
power dissipation over an order of magnitude lower than
systems employing a monolithic processor of equivalent
performance, while dynamic power dissipation remains
competitive. Taking into account both application
computation and interprocessor communication demands,
it is shown that there may exist an optimum operating
voltage that minimizes either time-to-solution, energy
usage, or the energy-delay product. This optimum
operating point is formulated analytically, calibrated
with system measurements, and evaluated for the
hardware platform and application presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{So:2013:STI,
author = "Won So and Alexander G. Dean",
title = "Software thread integration for instruction-level
parallelism",
journal = j-TECS,
volume = "13",
number = "1",
pages = "8:1--8:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512466",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multimedia applications require a significantly higher
level of performance than previous workloads of
embedded systems. They have driven digital signal
processor (DSP) makers to adopt high-performance
architectures like VLIW (Very-Long Instruction Word).
Despite many efforts to exploit instruction-level
parallelism (ILP) in the application, the speed is a
fraction of what it could be, limited by the difficulty
of finding enough independent instructions to keep all
of the processor's functional units busy. This article
proposes Software Thread Integration (STI) for
instruction-level parallelism. STI is a software
technique for interleaving multiple threads of control
into a single implicitly multithreaded one. We use STI
to improve the performance on ILP processors by merging
parallel procedures into one, increasing the compiler's
scope and hence allowing it to create a more efficient
instruction schedule. Assuming the parallel procedures
are given, we define a methodology for finding the best
performing integrated procedure with a minimum
compilation time. We quantitatively estimate the
performance impact of integration, allowing various
integration scenarios to be compared and ranked via
profitability analysis. During integration of threads,
different ILP-improving code transformations are
selectively applied according to the control structure
and the ILP characteristics of the code, driven by
interactions with software pipelining. The estimated
profitability is verified and corrected by an iterative
compilation approach, compensating for possible
estimation inaccuracy. Our modeling methods combined
with limited compilation quickly find the best
integration scenario without requiring exhaustive
integration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghasemzadeh:2013:ULP,
author = "Hassan Ghasemzadeh and Roozbeh Jafari",
title = "Ultra low-power signal processing in wearable
monitoring systems: a tiered screening architecture
with optimal bit resolution",
journal = j-TECS,
volume = "13",
number = "1",
pages = "9:1--9:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501626.2501636",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advances in technology have led to the development of
wearable sensing, computing, and communication devices
that can be woven into the physical environment of our
daily lives, enabling a large variety of new
applications in several domains, including wellness and
health care. Despite their tremendous potential to
impact our lives, wearable health monitoring systems
face a number of hurdles to become a reality. The
enabling processors and architectures demand a large
amount of energy, requiring sizable batteries. In this
article, we propose a granular decision-making
architecture for physical movement monitoring
applications. The module can be viewed as a tiered
wake-up circuitry. This decision-making module, in
combination with a low-power microcontroller, allows
for significant power saving through an ultra low-power
processing architecture. The significant power saving
is achieved by performing a preliminary ultra low-power
signal processing, and hence, keeping the
microcontroller off when the incoming signal is not of
interest. The preliminary signal processing is
performed by a set of special-purpose functional units,
also called screening blocks, that implement template
matching functions. We formulate and solve an
optimization problem for selecting screening blocks
such that the accuracy requirements of the signal
processing are accommodated while the total power is
minimized. Our experimental results on real data from
wearable motion sensors show that the proposed
algorithm achieves 63.2\% energy saving while
maintaining a sensitivity of 94.3\% in recognizing
transitional actions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2013:RED,
author = "Yuan-Hao Chang and Ming-Chang Yang and Tei-Wei Kuo and
Ren-Hung Hwang",
title = "A reliability enhancement design under the flash
translation layer for {MLC}-based flash-memory storage
systems",
journal = j-TECS,
volume = "13",
number = "1",
pages = "10:1--10:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512467",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Although flash memory has gained very strong momentum
in the storage market, the reliability of flash-memory
chips has been dropped significantly in the past years.
This article presents a reliability enhancement design
under the flash management layer (i.e., flash
translation layer) to address this concern so as to
reduce the design complexity of flash-memory management
software/firmware and to improve the maintainability
and portability of existing and future products. In
particular, a log-based write strategy with a
hash-based caching policy is proposed to provide extra
ECC redundancy and performance improvement. Strategies
for bad block management are also presented. The
failure rate of flash-memory storage systems is
analyzed with the considerations of bit errors. The
proposed design is later evaluated by a series of
experiments based on realistic traces. It was shown
that the proposed approach could significantly improve
the reliability of flash memory with very limited
system overheads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chao:2013:TLA,
author = "Chih-Hao Chao and Kun-Chih Chen and Tsu-Chu Yin and
Shu-Yen Lin and An-Yeu (Andy) Wu",
title = "Transport-layer-assisted routing for runtime thermal
management of {$3$D} {NoC} systems",
journal = j-TECS,
volume = "13",
number = "1",
pages = "11:1--11:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512468",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "To ensure thermal safety and to avoid performance
degradation from temperature regulation in 3D NoC, we
propose a new temperature-traffic control framework.
The framework contains the vertical throttling-based
runtime thermal management (VT-RTM) scheme and the
transport-layer assisted routing (TLAR) scheme. VT-RTM
scheme increases the cooling speed and maintains high
availability. TLAR scheme sustains the throughput of
the nonstationary irregular mesh network. In our
experiments, VT-RTM scheme reduces cooling time by 84\%
and achieves 98\% network availability; the overall
performance impact is around 8\% of traditional
schemes. TLAR scheme reduces average latency by 35\%
and improves sustainable throughput by 76\%",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kent:2013:CPS,
author = "Christopher G. Kent and Joann M. Paul",
title = "Contextual partitioning for speech recognition",
journal = j-TECS,
volume = "13",
number = "1",
pages = "12:1--12:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501626.2501639",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many multicore computers are single-user devices,
creating the potential to partition by situational
usage contexts, similar to how the human brain is
organized. Contextual partitioning (CP) permits
multiple simplified versions of the same task to exist
in parallel, with selection tied to the context in use.
We introduce CP for speech recognition, specifically
targeted at user interfaces in handheld embedded
devices. Contexts are drawn from webpage interactions.
CP results in 61\% fewer decoding errors, 97\% less
training for vocabulary changes, near-linear scaling
potential with increasing core counts, and up to a
potential 90\% reduction in power usage.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2013:DER,
author = "Sunwoo Kim and Won Seob Jeong and Won W. Ro and
Jean-Luc Gaudiot",
title = "Design and evaluation of random linear network coding
accelerators on {FPGAs}",
journal = j-TECS,
volume = "13",
number = "1",
pages = "13:1--13:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512469",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Network coding is a well-known technique used to
enhance network throughput and reliability by applying
special coding to data packets. One critical problem in
practice, when using the random linear network coding
technique, is the high computational overhead. More
specifically, using this technique in embedded systems
with low computational power might cause serious delays
due to the complex Galois field operations and matrix
handling. To this end, this article proposes a
high-performance decoding logic for random linear
network coding using field-programmable gate-array
(FPGA) technology. We expect that the inherent
reconfigurability of FPGAs will provide sufficient
performance as well as programmability to cope with
changes in the specification of the coding. The main
design motivation was to improve the decoding delay by
dividing and parallelizing the entire decoding process.
Fast arithmetic operations are achieved by the proposed
parallelized GF ALUs, which allow calculations with all
the elements of a single row of a matrix to be
performed concurrently. To improve the flexibility in
the utilization of the FPGA components, two different
decoding methods have been designed and compared. The
performance of the proposed idea is evaluated by
comparing with the performance of the decoding process
executed by general-purpose processors through an
equivalent software algorithm. Overall, a maximum
throughput of 65.98 Mbps is achieved with the proposed
FPGA design on an XC5VLX110T Virtex 5 device. In
addition, the proposed design provides speedups of up
to 13.84 compared to an aggressively parallelized
software decoding algorithm run on a quad-core AMD
processor. Moreover, the design affords 12 times higher
power efficiency in terms of throughput per watt than
an ARM Coretex-A9 processor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Beg:2013:CPA,
author = "Mirza Beg and Peter van Beek",
title = "A constraint programming approach for integrated
spatial and temporal scheduling for clustered
architectures",
journal = j-TECS,
volume = "13",
number = "1",
pages = "14:1--14:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512470",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 5 19:03:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many embedded processors use clustering to scale up
instruction-level parallelism in a cost-effective
manner. In a clustered architecture, the registers and
functional units are partitioned into smaller units and
clusters communicate through register-to-register copy
operations. Texas Instruments, for example, has a
series of architectures for embedded processors which
are clustered. Such an architecture places a heavier
burden on the compiler, which must now assign
instructions to clusters (spatial scheduling), assign
instructions to cycles (temporal scheduling), and
schedule copy operations to move data between clusters.
We consider instruction scheduling of local blocks of
code on clustered architectures to improve performance.
Scheduling for space and time is known to be a hard
problem. Previous work has proposed greedy approaches
based on list scheduling to simultaneously perform
spatial and temporal scheduling and phased approaches
based on first partitioning a block of code to do
spatial assignment and then performing temporal
scheduling. Greedy approaches risk making mistakes that
are then costly to recover from, and partitioning
approaches suffer from the well-known phase ordering
problem. In this article, we present a constraint
programming approach for scheduling instructions on
clustered architectures. We employ a problem
decomposition technique that solves spatial and
temporal scheduling in an integrated manner. We analyze
the effect of different hardware parameters-such as the
number of clusters, issue-width, and intercluster
communication cost-on application performance. We found
that our approach was able to achieve an improvement of
up to 26\%, on average, over a state-of-the-art
technique on superblocks from SPEC 2000 benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Brisk:2013:ISI,
author = "Philip Brisk and Tulika Mitra",
title = "Introduction to the special issue on
application-specific processors",
journal = j-TECS,
volume = "13",
number = "2",
pages = "15:1--15:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514642",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vyas:2013:HAS,
author = "Sudhanshu Vyas and Adwait Gupte and Christopher D.
Gill and Ron K. Cytron and Joseph Zambreno and Phillip
H. Jones",
title = "Hardware architectural support for control systems and
sensor processing",
journal = j-TECS,
volume = "13",
number = "2",
pages = "16:1--16:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514643",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The field of modern control theory and the systems
used to implement these controls have shown rapid
development over the last 50 years. It was often the
case that those developing control algorithms could
assume the computing medium was solely dedicated to the
task of controlling a plant, for example, the control
algorithm being implemented in software on a dedicated
Digital Signal Processor (DSP), or implemented in
hardware using a simple dedicated Programmable Logic
Device (PLD). As time progressed, the drive to place
more system functionality in a single component
(reducing power, cost, and increasing reliability) has
made this assumption less often true. Thus, it has been
pointed out by some experts in the field of control
theory (e.g., Astrom) that those developing control
algorithms must take into account the effects of
running their algorithms on systems that will be shared
with other tasks. One aspect of the work presented in
this article is a hardware architecture that allows
control developers to maintain this simplifying
assumption. We focus specifically on the
Proportional-Integral-Derivative (PID) controller. An
on-chip coprocessor has been implemented that can scale
to support servicing hundreds of plants, while
maintaining microsecond-level response times, tight
deterministic control loop timing, and allowing the
main processor to service noncontrol tasks. In order to
control a plant, the controller needs information about
the plant's state. Typically this information is
obtained from sensors with which the plant has been
instrumented. There are a number of common computations
that may be performed on this sensor data before being
presented to the controller (e.g., averaging and
thresholding). Thus in addition to supporting PID
algorithms, we have developed a Sensor Processing Unit
(SPU) that off-loads these common sensor processing
tasks from the main processor. We have prototyped our
ideas using Field Programmable Gate Array (FPGA)
technology. Through our experimental results, we show
our PID execution unit gives orders of magnitude
improvement in response time when servicing many
plants, as compared to a standard general software
implementation. We also show that the SPU scales much
better than a general software implementation. In
addition, these execution units allow the simplifying
assumption of dedicated computing medium to hold for
control algorithm development.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Beldianu:2013:MBV,
author = "Spiridon F. Beldianu and Sotirios G. Ziavras",
title = "Multicore-based vector coprocessor sharing for
performance and energy gains",
journal = j-TECS,
volume = "13",
number = "2",
pages = "17:1--17:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514644",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "For most of the applications that make use of a
dedicated vector coprocessor, its resources are not
highly utilized due to the lack of sustained data
parallelism which often occurs due to vector-length
variations in dynamic environments. The motivation of
our work stems from: (a) the mandate for multicore
designs to make efficient use of on-chip resources for
low power and high performance; (b) the omnipresence of
vector operations in high-performance scientific and
emerging embedded applications; (c) the need to often
handle a variety of vector sizes; and (d) vector
kernels in application suites may have diverse
computation needs. We present a robust design framework
for vector coprocessor sharing in multicore
environments that maximizes vector unit utilization and
performance at substantially reduced energy costs. For
our adaptive vector unit, which is attached to multiple
cores, we propose three basic shared working policies
that enforce coarse-grain, fine-grain, and vector-lane
sharing. We benchmark these vector coprocessor sharing
policies for a dual-core system and evaluate them using
the floating-point performance, resource utilization,
and power/energy consumption metrics. Benchmarking for
FIR filtering, FFT, matrix multiplication, and LU
factorization shows that these coprocessor sharing
policies yield high utilization and performance with
low energy costs. The proposed policies provide 1.2--2
speedups and reduce the energy needs by about 50\% as
compared to a system having a single core with an
attached vector coprocessor. With the performance
expressed in clock cycles, the sharing policies
demonstrate 3.62--7.92 speedups compared to optimized
Xeon runs. We also introduce performance and empirical
power models that can be used by the runtime system to
estimate the effectiveness of each policy in a hybrid
system that can simultaneously implement this suite of
shared coprocessor policies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jungeblut:2013:SAO,
author = "Thorsten Jungeblut and Boris H{\"u}bener and Mario
Porrmann and Ulrich R{\"u}ckert",
title = "A systematic approach for optimized bypass
configurations for application-specific embedded
processors",
journal = j-TECS,
volume = "13",
number = "2",
pages = "18:1--18:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514645",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The diversity of today's mobile applications requires
embedded processor cores with a high resource
efficiency, that means, the devices should provide a
high performance at low area requirements and power
consumption. The fine-grained parallelism supported by
multiple functional units of VLIW architectures offers
a high throughput at reasonable low clock frequencies
compared to single-core RISC processors. To efficiently
utilize the processor pipeline, common system
architectures have to cope with data hazards due to
data dependencies between consecutive operations. On
the one hand, such hazards can be resolved by complex
forwarding circuits (i.e., a pipeline bypass) which
forward intermediate results to a subsequent
instruction. On the other hand, the pipeline bypass can
strongly affect or even dominate the total resource
requirements and degrade the maximum clock frequency.
In this work the CoreVA VLIW architecture is used for
the development and the analysis of
application-specific bypass configurations. It is shown
that many paths of a comprehensive bypass system are
rarely used and may not be required for certain
applications. For this reason, several strategies have
been implemented to enhance the efficiency of the total
system by introducing application-specific bypass
configurations. The configuration can be carried out
statically by only implementing required paths or at
runtime by dynamically reconfiguring the hardware. An
algorithm is proposed which derives an optimized
configuration by iteratively disabling single bypass
paths. The adaptation of these application-specific
bypass configurations allows for a reduction of the
critical path by 26\%. As a result, the execution time
and energy requirements could be reduced by up to
21.5\%. Using Dynamic Frequency Scaling (DFS) and
dynamic deactivation/reactivation of bypass paths
allows for a runtime reconfiguration of the bypass
system. This ensures the highest efficiency while
processing varying applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Theodoropoulos:2013:CAM,
author = "Dimitris Theodoropoulos and Georgi Kuzmanov and Georgi
Gaydadjiev",
title = "Custom architecture for multicore audio beamforming
systems",
journal = j-TECS,
volume = "13",
number = "2",
pages = "19:1--19:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514646",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The audio Beamforming (BF) technique utilizes
microphone arrays to extract acoustic sources recorded
in a noisy environment. In this article, we propose a
new approach for rapid development of multicore BF
systems. Research on literature reveals that the
majority of such experimental and commercial audio
systems are based on desktop PCs, due to their
high-level programming support and potential of rapid
system development. However, these approaches introduce
performance bottlenecks, excessive power consumption,
and increased overall cost. Systems based on DSPs
require very low power, but their performance is still
limited. Custom hardware solutions alleviate the
aforementioned drawbacks, however, designers primarily
focus on performance optimization without providing a
high-level interface for system control and test. In
order to address the aforementioned problems, we
propose a custom platform-independent architecture for
reconfigurable audio BF systems. To evaluate our
proposal, we implement our architecture as a
heterogeneous multicore reconfigurable processor and
map it onto FPGAs. Our approach combines the software
flexibility of General-Purpose Processors (GPPs) with
the computational power of multicore platforms. In
order to evaluate our system we compare it against a BF
software application implemented to a low-power Atom
330, a middle-ranged Core2 Duo, and a high-end Core i3.
Experimental results suggest that our proposed solution
can extract up to 16 audio sources in real time under a
16-microphone setup. In contrast, under the same setup,
the Atom 330 cannot extract any audio sources in real
time, while the Core2 Duo and the Core i3 can process
in real time only up to 4 and 6 sources respectively.
Furthermore, a Virtex4-based BF system consumes more
than an order less energy compared to the
aforementioned GPP-based approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mariani:2013:DSE,
author = "Giovanni Mariani and Gianluca Palermo and Vittorio
Zaccaria and Cristina Silvano",
title = "Design-space exploration and runtime resource
management for multicores",
journal = j-TECS,
volume = "13",
number = "2",
pages = "20:1--20:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514647",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Application-specific multicore architectures are
usually designed by using a configurable platform in
which a set of parameters can be tuned to find the best
trade-off in terms of the selected figures of merit
(such as energy, delay, and area). This multi-objective
optimization phase is called Design-Space Exploration
(DSE). Among the design-time (hardware) configurable
parameters we can find the memory subsystem
configuration (such as cache size and associativity)
and other architectural parameters such as the
instruction-level parallelism of the system processors.
Among the runtime (software) configurable parameters we
can find the degree of task-level parallelism
associated with each application running on the
platform. The contribution of this article is twofold;
first, we introduce an evolutionary (NSGA-II-based)
methodology for identifying a hardware configuration
which is robust with respect to applications and
corresponding datasets. Second, we introduce a novel
runtime heuristic that exploits design-time identified
operating points to provide guaranteed throughput to
each application. Experimental results show that the
design-time/runtime combined approach improves the
runtime performance of the system with respect to
existing reference techniques, while meeting the
overall power budget.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2013:MPE,
author = "Yooseong Kim and Aviral Shrivastava",
title = "Memory performance estimation of {CUDA} programs",
journal = j-TECS,
volume = "13",
number = "2",
pages = "21:1--21:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514648",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "CUDA has successfully popularized GPU computing, and
GPGPU applications are now used in various embedded
systems. The CUDA programming model provides a simple
interface to program on GPUs, but tuning GPGPU
applications for high performance is still quite
challenging. Programmers need to consider numerous
architectural details, and small changes in source
code, especially on the memory access pattern, can
affect performance significantly. This makes it very
difficult to optimize CUDA programs. This article
presents CuMAPz, which is a tool to analyze and compare
the memory performance of CUDA programs. CuMAPz can
help programmers explore different ways of using shared
and global memories, and optimize their program for
efficient memory behavior. CuMAPz models several
memory-performance-related factors: data reuse, global
memory access coalescing, global memory latency hiding,
shared memory bank conflict, channel skew, and branch
divergence. Experimental results show that CuMAPz can
accurately estimate performance with correlation
coefficient of 0.96. By using CuMAPz to explore the
memory access design space, we could improve the
performance of our benchmarks by 30\% more than the
previous approach [Hong and Kim 2010].",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Stamoulias:2013:PAK,
author = "Ioannis Stamoulias and Elias S. Manolakos",
title = "Parallel architectures for the {kNN} classifier ---
design of soft {IP} cores and {FPGA} implementations",
journal = j-TECS,
volume = "13",
number = "2",
pages = "22:1--22:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514649",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We designed a variety of k-nearest-neighbor parallel
architectures for FPGAs in the form of parameterizable
soft IP cores. We show that they can be used to solve
large classification problems with thousands of
training vectors, or thousands of vector dimensions
using a single FPGA, and achieve very high throughput.
They can be used to flexibly synthesize architectures
that also cover: 1NN classification (vector
quantization), multishot queries (with different $k$),
LOOCV cross-validation, and compare favorably to GPU
implementations. To the best of our knowledge this is
the first attempt to design flexible IP cores for the
popular kNN classifier.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2013:ASP,
author = "Chen Huang and Frank Vahid and Tony Givargis",
title = "Automatic synthesis of physical system differential
equation models to a custom network of general
processing elements on {FPGAs}",
journal = j-TECS,
volume = "13",
number = "2",
pages = "23:1--23:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514650",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Fast execution of physical system models has various
uses, such as simulating physical phenomena or
real-time testing of medical equipment. Physical system
models commonly consist of thousands of differential
equations. Solving such equations using software on
microprocessor devices may be slow. Several past
efforts implement such models as parallel circuits on
special computing devices called Field-Programmable
Gate Arrays (FPGAs), demonstrating large speedups due
to the excellent match between the massive fine-grained
local communication parallelism common in physical
models and the fine-grained parallel compute elements
and local connectivity of FPGAs. However, past
implementation efforts were mostly manual or ad hoc. We
present the first method for automatically converting a
set of ordinary differential equations into circuits on
FPGAs. The method uses a general Processing Element
(PE) that we developed, designed to quickly solve a set
of ordinary differential equations while using few FPGA
resources. The method instantiates a network of general
PEs, partitions equations among the PEs to minimize
communication, generates each PE's custom program,
creates custom connections among PEs, and maintains
synchronization of all PEs in the network. Our
experiments show that the method generates a 400-PE
network on a commercial FPGA that executes four
different models on average $ 15 \times $ faster than a
3 GHz Intel processor, $ 30 \times $ faster than a
commercial 4-core ARM, $ 14 \times $ faster than a
commercial 6-core Texas Instruments digital signal
processor, and $ 4.4 \times $ faster than an NVIDIA
336-core graphics processing unit. We also show that
the FPGA-based approach is reasonably cost effective
compared to using the other platforms. The method
yields $ 2.1 \times $ faster circuits than a commercial
high-level synthesis tool that uses the traditional
method for converting behavior to circuits, while using
$ 2 \times $ fewer lookup tables, $ 2 \times $ fewer
hardcore multiplier (DSP) units, though $ 3.5 \times $
more block RAM due to being programmable. Furthermore,
the method does not just generate a single fastest
design, but generates a range of designs that trade off
size and performance, by using different numbers of
PEs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Canis:2013:LOS,
author = "Andrew Canis and Jongsok Choi and Mark Aldham and
Victor Zhang and Ahmed Kammoona and Tomasz Czajkowski
and Stephen D. Brown and Jason H. Anderson",
title = "{LegUp}: an open-source high-level synthesis tool for
{FPGA}-based processor\slash accelerator systems",
journal = j-TECS,
volume = "13",
number = "2",
pages = "24:1--24:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514740",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "It is generally accepted that a custom hardware
implementation of a set of computations will provide
superior speed and energy efficiency relative to a
software implementation. However, the cost and
difficulty of hardware design is often prohibitive, and
consequently, a software approach is used for most
applications. In this article, we introduce a new
high-level synthesis tool called LegUp that allows
software techniques to be used for hardware design.
LegUp accepts a standard C program as input and
automatically compiles the program to a hybrid
architecture containing an FPGA-based MIPS soft
processor and custom hardware accelerators that
communicate through a standard bus interface. In the
hybrid processor/accelerator architecture, program
segments that are unsuitable for hardware
implementation can execute in software on the
processor. LegUp can synthesize most of the C language
to hardware, including fixed-sized multidimensional
arrays, structs, global variables, and pointer
arithmetic. Results show that the tool produces
hardware solutions of comparable quality to a
commercial high-level synthesis tool. We also give
results demonstrating the ability of the tool to
explore the hardware/software codesign space by varying
the amount of a program that runs in software versus
hardware. LegUp, along with a set of benchmark C
programs, is open source and freely downloadable,
providing a powerful platform that can be leveraged for
new research on a wide range of high-level synthesis
topics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Papakonstantinou:2013:ECC,
author = "Alexandros Papakonstantinou and Karthik Gururaj and
John A. Stratton and Deming Chen and Jason Cong and
Wen-Mei W. Hwu",
title = "Efficient compilation of {CUDA} kernels for
high-performance computing on {FPGAs}",
journal = j-TECS,
volume = "13",
number = "2",
pages = "25:1--25:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514652",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The rise of multicore architectures across all
computing domains has opened the door to heterogeneous
multiprocessors, where processors of different compute
characteristics can be combined to effectively boost
the performance per watt of different application
kernels. GPUs, in particular, are becoming very popular
for speeding up compute-intensive kernels of
scientific, imaging, and simulation applications. New
programming models that facilitate parallel processing
on heterogeneous systems containing GPUs are spreading
rapidly in the computing community. By leveraging these
investments, the developers of other accelerators have
an opportunity to significantly reduce the programming
effort by supporting those accelerator models already
gaining popularity. In this work, we adapt one such
language, the CUDA programming model, into a new FPGA
design flow called FCUDA, which efficiently maps the
coarse- and fine-grained parallelism exposed in CUDA
onto the reconfigurable fabric. Our CUDA-to-FPGA flow
employs AutoPilot, an advanced high-level synthesis
tool (available from Xilinx) which enables
high-abstraction FPGA programming. FCUDA is based on a
source-to-source compilation that transforms the SIMT
(Single Instruction, Multiple Thread) CUDA code into
task-level parallel C code for AutoPilot. We describe
the details of our CUDA-to-FPGA flow and demonstrate
the highly competitive performance of the resulting
customized FPGA multicore accelerators. To the best of
our knowledge, this is the first CUDA-to-FPGA flow to
demonstrate the applicability and potential advantage
of using the CUDA programming model for
high-performance computing in FPGAs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Editors:2013:ISS,
author = "{Editors}",
title = "Introduction to the special section on
{ESTIMedia'10}",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "26:1--26:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536748",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jia:2013:SLI,
author = "Zai Jian Jia and Tom{\'a}s Bautista and Antonio
N{\'u}{\~n}ez and Andy D. Pimentel and Mark Thompson",
title = "A system-level infrastructure for multidimensional
{MP-SoC} design space co-exploration",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "27:1--27:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536749",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present a flexible and extensible
system-level MP-SoC design space exploration (DSE)
infrastructure, called NASA. This highly modular
framework uses well-defined interfaces to easily
integrate different system-level simulation tools as
well as different combinations of search strategies in
a simple plug-and-play fashion. Moreover, NASA deploys
a so-called dimension-oriented DSE approach, allowing
designers to configure the appropriate number of,
well-tuned and possibly different, search algorithms to
simultaneously co-explore the various design space
dimensions. As a result, NASA provides a flexible and
re-usable framework for the systematic exploration of
the multidimensional MP-SoC design space, starting from
a set of relatively simple user specifications. To
demonstrate the capabilities of the NASA framework and
to illustrate its distinct aspects, we also present
several DSE experiments in which, for example, we
compare NASA configurations using a single search
algorithm for all design space dimensions to
configurations using a separate search algorithm per
dimension. These proof-of-concept experiments indicate
that the latter multidimensional co-exploration can
find better design points and evaluates a higher
diversity of design alternatives as compared to the
more traditional approach of using a single search
algorithm for all dimensions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nadezhkin:2013:AGP,
author = "Dmitry Nadezhkin and Hristo Nikolov and Todor
Stefanov",
title = "Automated generation of polyhedral process networks
from affine nested-loop programs with dynamic loop
bounds",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "28:1--28:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536750",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Process Networks (PNs) is a suitable parallel
model of computation (MoC) used to specify embedded
streaming applications in a parallel form facilitating
the efficient mapping onto embedded parallel execution
platforms. Unfortunately, specifying an application
using a parallel MoC is a very difficult and highly
error-prone task. To overcome the associated
difficulties, we have developed the pn compiler, which
derives specific Polyhedral Process Networks (PPN)
parallel specifications from sequential static affine
nested loop programs (SANLPs). However, there are many
applications, for example, multimedia applications
(MPEG coders/decoders, smart cameras, etc.) that have
adaptive and dynamic behavior which cannot be expressed
as SANLPs. Therefore, in order to handle dynamic
multimedia applications, in this article we address the
important question whether we can relax some of the
restrictions of the SANLPs while keeping the ability to
perform compile-time analysis and to derive PPNs.
Achieving this would significantly extend the range of
applications that can be parallelized in an automated
way. The main contribution of this article is a first
approach for automated translation of affine nested
loop programs with dynamic loop bounds into
input-output equivalent Polyhedral Process Networks. In
addition, we present a method for analyzing the
execution overhead introduced in the PPNs derived from
programs with dynamic loop bounds. The presented
automated translation approach has been evaluated by
deriving a PPN parallel specification from a real-life
application called Low Speed Obstacle Detection (LSOD)
used in the smart cameras domain. By executing the
derived PPN, we have obtained results which indicate
that the approach we present in this article
facilitates efficient parallel implementations of
sequential nested loop programs with dynamic loop
bounds. That is, our approach reveals the possible
parallelism available in such applications, which
allows for the utilization of multiple cores in an
efficient way.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2013:AMC,
author = "Yulei Wu and Geyong Min and Dakai Zhu and Laurence T.
Yang",
title = "An analytical model for on-chip interconnects in
multimedia embedded systems",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "29:1--29:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536751",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The traffic pattern has significant impact on the
performance of network-on-chip. Many recent studies
have shown that multimedia applications can be
supported in on-chip interconnects. Driven by the
motivation of evaluating on-chip interconnects in
multimedia embedded systems, a new analytical model is
proposed to investigate the performance of the fat-tree
based on-chip interconnection network under bursty
multimedia traffic and nonuniform message destinations.
Extensive simulation experiments are conducted to
validate the accuracy of the model, which is then
adopted as a cost-efficient tool to investigate the
effects of bursty multimedia traffic with nonuniform
destinations on the network performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Che:2013:SSD,
author = "Weijia Che and Karam S. Chatha",
title = "Scheduling of synchronous data flow models onto
scratchpad memory-based embedded processors",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "30:1--30:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536752",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose a heuristic algorithm for
scheduling synchronous data flow (SDF) models on
scratch pad memory (SPM) enhanced processors with the
objective of minimizing its steady-state execution
time. The task involves partitioning the limited
on-chip SPM for actor code and data buffer, and
executing actors in such a manner that the physical SPM
is time shared with different actors and buffers
(formally defined as code overlay and data overlay,
respectively). In our setup, a traditional minimum
buffer schedule could result in very high code overlay
overhead and therefore may not be optimal. To reduce
the number of direct memory access (DMA) transfers,
actors need to be grouped into segments. Prefetching of
code and data overlay that overlaps DMA transfers with
actor executions also need to be exploited. The
efficiency of the our heuristic was evaluated by
compiling ten stream applications onto one synergistic
processing engine (SPE) of an IBM Cell Broadband
Engine. We compare the performance results of our
heuristic approach with a minimum buffer scheduling
approach and a 3-stage ILP approach, and show that our
heuristic is able to generate high quality solutions
with fast algorithm run time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schmoll:2013:IFR,
author = "Florian Schmoll and Andreas Heinig and Peter Marwedel
and Michael Engel",
title = "Improving the fault resilience of an {H.264} decoder
using static analysis methods",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "31:1--31:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536753",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Fault tolerance rapidly evolves into one of the most
significant design objectives for embedded systems due
to reduced semiconductor structures and supply
voltages. However, resource-constrained systems cannot
afford traditional error correction for overhead and
cost reasons. New methods are required to sustain
acceptable service quality in case of errors while
avoiding crashes. We present a flexible fault-tolerance
approach that is able to select correction actions
depending on error semantics using application
annotations and static analysis approaches. We verify
the validity of our approach by analyzing the
vulnerability and improving the reliability of an H.264
decoder using flexible error handling.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Parmer:2013:PCC,
author = "Gabriel Parmer and Richard West",
title = "Predictable and configurable component-based
scheduling in the {Composite OS}",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "32:1--32:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536754",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents the design of user-level
scheduling hierarchies in the Composite component-based
system. The motivation for this is centered around the
design of a system that is both dependable and
predictable, and which is configurable to the needs of
specific applications. Untrusted application developers
can safely develop services and policies, that are
isolated in protection domains outside the kernel. To
ensure predictability, Composite enforces timing
control over user-space services. Moreover, it must
provide a means by which asynchronous events, such as
interrupts, are handled in a timely manner without
jeopardizing the system. Towards this end, we describe
the features of Composite that allow user-defined
scheduling policies to be composed for the purposes of
combined interrupt and task management. A significant
challenge arises from the need to synchronize access to
shared data structures (e.g., scheduling queues),
without allowing untrusted code to disable interrupts.
Additionally, efficient upcall mechanisms are needed to
deliver asynchronous event notifications in accordance
with policy-specific priorities, without undue recourse
to schedulers. We show how these issues are addressed
in Composite, by comparing several hierarchies of
scheduling polices, to manage both tasks and the
interrupts on which they depend. Studies show how it is
possible to implement guaranteed differentiated
services as part of the handling of I/O requests from a
network device while diminishing livelock.
Microbenchmarks indicate that the costs of implementing
and invoking user-level schedulers in Composite are on
par with, or less than, those in other systems, with
thread switches more than twice as fast as in Linux.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2013:ARD,
author = "Bo Zhou and Xiaobo Sharon Hu and Danny Z. Chen and
Cedric X. Yu",
title = "Accelerating radiation dose calculation: a
multi-{FPGA} solution",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "33:1--33:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536755",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Remarkable progress has been made in the past few
decades in various aspects of radiation therapy (RT).
However, some of these promising technologies, such as
image-guided online replanning and arc therapy, rely
heavily on the availability of fast dose calculation.
In this article, based on a popular dose calculation
algorithm, the Collapsed-Cone Convolution/Superposition
(CCCS) algorithm, we present a multi-FPGA accelerator
to speed up radiation dose calculation. Our
performance-driven design strategy yields a fully
pipelined architecture, which includes a
resource-economic raytracing engine and
high-performance energy deposition pipeline. An
evaluation based on a set of clinical treatment
planning cases confirms that our FPGA design almost
fully utilizes the available external memory bandwidth
and achieves close to the best possible performance for
the CCCS algorithm while using less resource. Compared
with an existing FPGA design which aimed to accelerate
the identical algorithm, the proposed design achieved
1.9X speedup by providing better memory bandwidth
utilization (81.7\% v.s. 43\% of the available external
memory bandwidth), higher working frequency (90MHz v.s.
70MHz) and less logic resource usage (25K v.s. 55K
logic cells). Furthermore, it obtains a speedup of 20X
over a commercial multithreaded software on a quad-core
system and 15X performance improvement over closely
related results. In terms of accuracy, the measured
less than 1\% statistical fluctuation indicates that
our solution is practical in real medical scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Furtado:2013:CON,
author = "Pedro Furtado and Jos{\'e} Cec{\'\i}lio",
title = "Configuration and operation of networked control
systems over heterogeneous {WSANs}",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "34:1--34:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536756",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "There have been both research and commercial advances
on applying Wireless Sensor and Actuator Networks (WSN)
in industrial premises. These have cost advantages
related to avoiding some cabled deployments. A possible
architecture involves a Networked Control System (NCS)
with many small WSN subnetworks, cabled nodes and
computer servers (e.g., servers, control stations). In
those systems individual sensor nodes can be
programmed, as opposed to cabled analog systems. We
investigate approaches for networked-wide
configuration, where all nodes-cabled or WSN
sensors-can be configured with simplicity from a single
interface, instead of hand-coding or complex
configurations of individual nodes. We propose an
architecture and approach for configuration and
operation. Previous related proposals on middleware
involving WSNs suffer from two major limitations: they
either program within an individual WSN or configure
operation outside WSNs, wrapping data coming from WSN.
They do not allow configuring WSN and non-WSN nodes for
operation from a single interface. We discuss the
architecture and propose the NCSWSN configuration and
operation approach. We are applying this system in an
industrial testbed, therefore we test the approach and
also show user interfaces and results from the
deployment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sanz:2013:SLM,
author = "Concepci{\'o}n Sanz and Jos{\'e} Ignacio G{\'o}mez and
Christian Tenllado and Manuel Prieto and Francky
Catthoor",
title = "System-level memory management based on statistical
variability compensation for frame-based applications",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "35:1--35:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536757",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Process variability and dynamic domains increase the
uncertainty of embedded systems and force designers to
apply pessimistic designs, which become unnecessarily
conservative and have a tremendous impact on both
performance and energy consumption. In this context,
developing uncertainty-aware design methodologies that
take both variation at platform and at application
level into account becomes a must. These methodologies
should mitigate the effects derived from uncertainty,
avoiding worst-case assumptions. In this article we
propose a comprehensive methodology to tackle two forms
of uncertainty: (1) process variation on the memory
system, (2) application dynamism. A statistical model
has been developed to deal with variability derived
from fabrication process, whereas system scenarios are
selected to cope with dynamic domains. Both sources of
uncertainty are firstly tackled in combination at
design time, to be refined later, at setup. As a
result, at run time the platform can be successfully
adapted to the current application behaviour as well as
the current variations. Our simulations show that this
methodology provides significant energy savings while
still meeting strict timing constraints.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mohaqeqi:2013:ASR,
author = "Morteza Mohaqeqi and Mehdi Kargahi and Maryam
Dehghan",
title = "Adaptive scheduling of real-time systems cosupplied by
renewable and nonrenewable energy sources",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "36:1--36:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536758",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy management is an important issue in today's
real-time systems due to the high costs of energy
supplying. Using renewable, like wave, wind, and solar
energy sources seem promising methods to address this
issue. However, because of the existing contrast
between the critical nature of hard real-time systems
and the unpredictable nature of renewable energies,
some supplementary energy source like electricity grid
or battery is needed. In this paper, we consider hard
real-time systems with two renewable and nonrenewable
energy sources. In order to reduce the costs, we
present two dynamic voltage scaling controllers to
minimize the energy attained from the latter source. In
order to handle variations of the environmental energy
and workload, the model predictive control approach is
employed. One nonlinear approach beside one fast linear
piecewise affine explicit controller are proposed. The
efficacies of the proposed approaches have been
investigated through extensive simulations. Comparisons
to an ideal clairvoyant controller as a baseline show
that, in the studied scenarios, the proposed
controllers guarantee at least 78\% of the baseline
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lo:2013:AGH,
author = "Chen-Kang Lo and Mao-Lin Li and Li-Chun Chen and
Yi-Shan Lu and Ren-Song Tsay and Hsu-Yao Huang and
Jen-Chieh Yeh",
title = "Automatic generation of high-speed accurate {TLM}
models for out-of-order pipelined bus",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "37:1--37:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536759",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Although pipelined/out-of-order (PL/OO) execution
features are commonly supported by the state-of-the-art
bus designs, no existing manual
Transaction-Level-Modeling (TLM) approaches can
effectively construct fast and accurate simulation
models for PL/OO buses. Mainly, the inherent high
design complexity of concurrent PL/OO behaviors makes
the manual approaches tedious and error-prone. To
tackle the complicated modeling task, this article
presents an automatic approach that performs systematic
abstraction and generation of fast-and-accurate
simulation models. The experimental results show that
our approach reduces 21 times modeling efforts, while
our generated models perform simulation an order of
magnitude faster than Cycle-Accurate models with the
same PL/OO transaction execution cycle counts
preserved.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2013:SBR,
author = "Jongeun Lee and Aviral Shrivastava",
title = "Software-based register file vulnerability reduction
for embedded processors",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "38:1--38:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536760",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Register File (RF) is extremely vulnerable to soft
errors, and traditional redundancy based schemes to
protect the RF are prohibitive not only because RF is
often in the timing critical path of the processor, but
also since it is one of the hottest blocks on the chip.
Software approaches would be ideal in this case, but
previous approaches based on instruction scheduling are
only moderately effective due to local scope. In this
article we present a compiler approach, based on
interprocedural program analysis, to reduce the
vulnerability of registers by temporarily writing live
variables to protected memory. We formulate the problem
as an integer linear programming problem and also
present a very efficient heuristic algorithm. Further
we present an iterative optimization method based on
Kernighan--Lin's graph partitioning algorithm. Our
experiments demonstrate that our proposed techniques
can reduce the vulnerability of a RF by 33 to 37\% on
average and up to 66\%, with a small 2\% increase in
runtime. In addition, our overhead reduction
optimization can effectively reduce the code size
overhead, by more than 40\% on average, to a mere 5 to
6\%, compared to highly optimized binaries.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Singh:2013:MCN,
author = "Anshul Singh and Arindam Basu and Keck-Voon Ling and
Vincent J. {Mooney III}",
title = "Models for characterizing noise based {PCMOS}
circuits",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "39:1--39:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536761",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Quick and accurate error-rate prediction of
Probabilistic CMOS (PCMOS) circuits is crucial for
their systematic design and performance evaluation.
While still in the early stage of research, PCMOS has
shown potential to drastically reduce energy
consumption at a cost of increased errors. Recently, a
methodology has been proposed which could predict the
error rates of cascade structures of blocks in PCMOS.
This methodology requires error rates of unique blocks
to predict the error rates of multiblock cascade
structures composed of these unique blocks. In this
article we present a new model for characterization of
probabilistic circuits/blocks and present a procedure
to find and characterize unique circuits/blocks. Unlike
prior approaches, our new model distinguishes distinct
filtering effects per output, thereby improving
prediction accuracy by an average of 95\% over the
prior art by Palem and coauthors. Furthermore, we show
two models where our new model with three stages is
18\% more accurate, on average, than our simpler
two-stage model. We apply our proposed models to Ripple
Carry Adders and Wallace Tree Multipliers and show that
using our models, the methodology of cascade structures
can predict error rates of PCMOS circuits with
reasonable accuracy (within 9\%) in PCMOS for uniform
voltages as well as multiple voltages. Finally, our
approach takes seconds of simulation time whereas using
HSPICE would take days of simulation time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anagnostopoulos:2013:PAD,
author = "Iraklis Anagnostopoulos and Jean-Michel Chabloz and
Ioannis Koutras and Alexandros Bartzas and Ahmed Hemani
and Dimitrios Soudris",
title = "Power-aware dynamic memory management on many-core
platforms utilizing {DVFS}",
journal = j-TECS,
volume = "13",
number = "1s",
pages = "40:1--40:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2536747.2536762",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Dec 9 11:30:05 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today multicore platforms are already prevalent
solutions for modern embedded systems. In the future,
embedded platforms will have an even more increased
processor core count, composing many-core platforms. In
addition, applications are becoming more complex and
dynamic and try to efficiently utilize the amount of
available resources on the embedded platforms.
Efficient memory utilization is a key challenge for
application developers, especially since memory is a
scarce resource and often becomes the system's
bottleneck. To cope with this dynamism and achieve
better memory footprint utilization (low memory
fragmentation) application developers resort to the
usage of dynamic memory (heap) management techniques,
by allocating and deallocating data at runtime.
Moreover, overall power consumption is another key
challenge that needs to be taken into consideration.
Towards this, designers employ the usage of Dynamic
Voltage and Frequency Scaling (DVFS) mechanisms,
adapting to the application's computational demands at
runtime. In this article, we propose the combination of
dynamic memory management techniques with DVFS ones.
This is performed by integrating, within the memory
manager, runtime monitoring mechanisms that steer the
DVFS mechanisms to adjust clock frequency and voltage
supply based on heap performance. The proposed approach
has been evaluated on a distributed shared-memory
many-core platform composed of multiple LEON3
processors interconnected by a Network-on-Chip
infrastructure, supporting DVFS. Experimental results
show that by using the proposed method for monitoring
and applying DVFS mechanisms the power consumption
concerning dynamic memory management was reduced by
approximately 37\%. In addition we present the
trade-offs the proposed approach. Last, by combining
the developed method with heap fragmentation-aware
dynamic memory managers, we achieve low heap
fragmentation values combined with low power
consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Editors:2014:MMA,
author = "{Editors}",
title = "Monitoring massive appliances by a minimal number of
smart meters",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "56:1--56:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544376",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a framework for deploying a
minimal number of smart meters to accurately track the
ON/OFF states of a massive number of electrical
appliances which exploits the sparseness feature of
simultaneous ON/OFF switching events of the massive
appliances. A theoretical bound on the least number of
required smart meters is studied by an entropy-based
approach, which qualifies the impact of meter
deployment strategies to the state tracking accuracy.
It motivates a meter deployment optimization algorithm
(MDOP) to minimize the number of meters while
satisfying given requirements to state tracking
accuracy. To accurately decode the real-time ON/OFF
states of appliances by the readings of meters, a fast
state decoding (FSD) algorithm based on the hidden
Markov model (HMM) is presented to track the state
sequence of each appliance for better accuracy.
Although traditional HMM needs $ O(t 2^{2 N}) $ time
complexity to conduct online sequence decoding, FSD
improves the complexity to $ O (t n^{U + 1}) $, where n
{$<$} N and U is an upper bound of the simultaneous
switching events. Both MDOP and FSD are verified
extensively using simulations and real PowerNet data.
The results show that the meter deployment cost can be
saved by more than 80\% while still getting over 90\%
state tracking accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2014:EDF,
author = "Chenye Wu and Yiyu Shi and Soummya Kar",
title = "Exploring demand flexibility in heterogeneous
aggregators: an {LMP}-based pricing scheme",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "57:1--57:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544377",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the proposed penetration of electric vehicles and
advanced metering technology, the demand side is
foreseen to play a major role in flexible energy
consumption scheduling. On the other hand, the past
several years have witnessed utility companies' growing
interests to integrate more renewable energy resources.
These renewable resources, for example, wind or solar,
due to their intermittent nature, brought great
uncertainty to the power grid system. In this article,
we propose a mechanism that attempts to mitigate the
grid operational uncertainty induced by renewable
energies by properly exploiting demand flexibility with
the help of advanced smart-metering technology. To
address the challenge, we develop a novel locational
marginal price (LMP)-based pricing scheme that involves
active demand-side participation by casting the network
objective as a two-stage Stackelberg game between the
local grid operator and several aggregators. In
contrast to the conventional notion that generation
follows load, our game formulation provides more
flexibility for the operators and tries to provide
adequate incentives for the loads to follow the
(stochastic renewable) generation. We use the solution
concept of subgame perfect equilibrium to analyze the
resulting game. Subsequently, we discuss the optimal
real-time conventional capacity planning for the local
grid operator to achieve the minimal mismatch between
supply and demand with the wind power integration.
Finally, we assess our proposed scheme with field data.
The simulation results show that our proposed scheme
works reasonably well in the long term, even with
simple heuristics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2014:ISS,
author = "Naehyuck Chang and Jian-Jia Chen",
title = "Introduction to the special section on
{ESTIMedia'11}",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "58:1--58:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544378",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Su:2014:RVP,
author = "Tzu-Hsiang Su and Hsiang-Jen Tsai and Keng-Hao Yang
and Po-Chun Chang and Tien-Fu Chen and Yi-Ting Zhao",
title = "Reconfigurable vertical profiling framework for the
{Android} runtime system",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "59:1--59:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544379",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Dalvik virtual machine in the Android system creates a
profiling barrier between VM-space applications and
Linux user-space libraries. It is difficult for
existing profiling tools on the Android system to
definitively identify whether a bottleneck occurred in
the application level, the Linux user-space level, or
the Linux kernel level. Information barriers exist
between VM-space applications and Linux native analysis
tools due to runtime virtual machines' dynamic memory
allocation mechanism. Furthermore, traditional vertical
profiling tools targeted for Java virtual machines
cannot be simply applied on the Dalvik virtual machine
due to its unique design. The proposed the
Reconfigurable Vertical Profiling Framework bridges the
information gap and streamlines the hardware-software
co-design process for the Android runtime system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Song:2014:POA,
author = "Wook Song and Yeseong Kim and Hakbong Kim and Jehun
Lim and Jihong Kim",
title = "Personalized optimization for {Android} smartphones",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "60:1--60:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544380",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As a highly personalized computing device, smartphones
present a unique new opportunity for system
optimization. For example, it is widely observed that a
smartphone user exhibits very regular application usage
patterns (although different users are quite different
in their usage patterns). User-specific high-level app
usage information, when properly managed, can provide
valuable hints for optimizing various system design
requirements. In this article, we describe the design
and implementation of a personalized optimization
framework for the Android platform that takes advantage
of user's application usage patterns in optimizing the
performance of the Android platform. Our optimization
framework consists of two main components, the
application usage modeling module and the usage
model-based optimization module. We have developed two
novel application usage models that correctly capture
typical smartphone user's application usage patterns.
Based on the application usage models, we have
implemented an app-launching experience optimization
technique which tries to minimize user-perceived
delays, extra energy consumption, and state loss when a
user launches apps. Our experimental results on the
Nexus S Android reference phones show that our proposed
optimization technique can avoid unnecessary
application restarts by up to 78.4\% over the default
LRU-based policy of the Android platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mirzoyan:2014:PVA,
author = "Davit Mirzoyan and Benny Akesson and Kees Goossens",
title = "Process-variation-aware mapping of best-effort and
real-time streaming applications to {MPSoCs}",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "61:1--61:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2490819",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As technology scales, the impact of process variation
on the maximum supported frequency (FMAX) of individual
cores in a multiprocessor system-on-chip (MPSoC)
becomes more pronounced. Task allocation without
variation-aware performance analysis can greatly
compromise performance and lead to a significant loss
in yield, defined as the percentage of manufactured
chips satisfying the application timing requirement. We
propose variation-aware task allocation for best-effort
and real-time streaming applications modeled as task
graphs. Our solutions are primarily based on the
throughput requirement, which is the most important
timing requirement in many real-time streaming
applications. The four main contributions of this work
are (1) distinguishing best-effort firm real-time and
soft real-time application classes, which require
different optimization criteria, (2) using dataflow
graphs, which are well suited for modeling and analysis
of streaming applications, we explicitly model task
execution both in terms of clock cycles (which is
independent of variation) and seconds (which does
depend on the variation of the resource), which we
connect by an explicit binding, (3) we present two
optimization approaches, which give different
improvement results at different costs, (4) we present
both exhaustive and heuristic algorithms that implement
the optimization approaches. Our variation-aware
mapping algorithms are tested on models of seven real
applications and are compared to mapping methods that
are unaware of hardware variation. Our results
demonstrate (1) improvements in the average performance
(3\% on average) for best-effort applications, and (2)
for firm real-time and soft real-time applications,
yield improvements of up to 27\% with an average of
15\%, showing the effectiveness of our approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jung:2014:HCO,
author = "Dong-Heon Jung and Soo-Mook Moon and Hyeong-Seok Oh",
title = "Hybrid compilation and optimization for {Java}-based
digital {TV} platforms",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "62:1--62:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2506257",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Java-based software platform for interactive
digital TV (DTV) is composed of the system/middleware
class statically installed on the DTV set-top box and
the xlet applications dynamically downloaded from the
TV stations. The xlet application includes Java classes
and image/text files. The xlets are executed only when
the TV viewer initiates an interaction, even if the
xlets have been completely downloaded. To achieve high
performance on this dual-component, user-initiated
system, existing just-in-time (JIT) compilation and
optimization is not enough; instead, ahead-of-time and
idle-time compilation and optimization are also needed,
requiring a hybrid compilation and optimization
environment. We constructed such a hybrid environment
for a commercial DTV software platform and evaluated it
using real, on-air xlet applications. Our experimental
results show that the proposed hybrid environment can
improve the DTV Java performance by more than three
times, compared to the JIT-only environment, with
little change to other DTV behavior.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2014:RAC,
author = "Li-Pin Chang and Chen-Yi Wen",
title = "Reducing asynchrony in channel garbage-collection for
improving internal parallelism of multichannel
solid-state disks",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "63:1--63:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544383",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Solid-state disks use multichannel architectures to
boost their data transfer rates. Because realistic disk
workloads have numerous small write requests, modern
flash-storage devices adopt a write buffer and a set of
independent channels for better parallelism in serving
small write requests. When a channel is undergoing
garbage collection, it stops responding to inbound
write traffic and accumulates page data in the write
buffer. This results in contention for buffer space and
creates idle periods in channels. This study presents a
channel-management strategy, called garbage-collection
advancing, which allows early start of garbage
collection in channels for increasing the overlap among
channel activities of garbage collection and restoring
the balance of buffer-space usage among channels. This
study further introduces cycle filling, which is a
version of garbage-collection advancing tailored for
the operation model of flash planes. Experimental
results show that the proposed methods greatly
outperformed existing designs of multichannel systems
in terms of response and throughput. We also
successfully implemented the proposed methods in a real
solid-state disk and proved their feasibility in real
hardware.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2014:MRT,
author = "Zheng Li and Frank Lockom and Shangping Ren",
title = "Maintaining real-time application timing similarity
for defect-tolerant {NoC}-based many-core systems",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "64:1--64:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544384",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many-core Network-on-Chip (NoC) processors are
emerging in broad application areas, including those
with timing requirements, such as real-time and
multimedia applications. Typically, these processors
employ core-level backup to improve yield. However,
when defective cores are replaced by backup ones, the
NoC topology changes. Consequently, a fine-tuned
application based on timing parameters given by one
topology may not meet the expected timing behavior
under the new one. We first develop a metric to measure
timing similarity of an application on different NoC
topologies and then propose mixed binary quadratic
programming and greedy algorithms to reconfigure a
defect-tolerant many-core NoC.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2014:TSA,
author = "Masud Ahmed and Nathan Fisher",
title = "Tractable schedulability analysis and resource
allocation for real-time multimodal systems",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "65:1--65:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544385",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Real-time multimedia subsystems often require support
for switching between different resource and
application execution modes. To ensure that timing
constraints are not violated during or after a
subsystem mode change, real-time schedulability
analysis is required. However, existing time-efficient
multimode schedulability analysis techniques for
application-only mode changes are not appropriate for
subsystems that require changes in the resource
execution behavior (e.g., processors with dynamic power
modes). Furthermore, all existing multimode
schedulability analysis that handles both resource and
application mode changes is highly exponential and not
scalable for subsystems with a moderate or large number
of modes. As a result, the notion of resource
optimality is still unaddressed for real-time
multimodal systems. In this report, we first address
the lack of tractable schedulability analysis for such
subsystems by proposing a model for characterizing
multiple resource and application modes and by deriving
a sufficient schedulability test that has
pseudo-polynomial time complexity. Finally, we propose
an algorithm which leverages this pseudo-polynomial
schedulability analysis to optimize the resource usages
(e.g., to minimize peak-power load) of a multimodal
real-time system. Simulation results show that our
proposed algorithms for schedulability analysis and
resource allocation, when compared with
previously-proposed approaches, require significantly
less time and are just as precise.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Balani:2014:DPF,
author = "Rahul Balani and Lucas F. Wanner and Mani B.
Srivastava",
title = "Distributed programming framework for fast iterative
optimization in networked cyber-physical systems",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "66:1--66:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544386",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Large-scale coordination and control problems in
cyber-physical systems are often expressed within the
networked optimization model. While significant
advances have taken place in optimization techniques,
their widespread adoption in practical implementations
has been impeded by the complexity of internode
coordination and lack of programming support for the
same. Currently, application developers build their own
elaborate coordination mechanisms for synchronized
execution and coherent access to shared resources via
distributed and concurrent controller processes.
However, they typically tend to be error prone and
inefficient due to tight constraints on application
development time and cost. This is unacceptable in many
CPS applications, as it can result in expensive and
often irreversible side-effects in the environment due
to inaccurate or delayed reaction of the control
system. This article explores the design of a
distributed shared memory (DSM) architecture that
abstracts the details of internode coordination. It
simplifies application design by transparently managing
routing, messaging, and discovery of nodes for coherent
access to shared resources. Our key contribution is the
design of provably correct locality-sensitive
synchronization mechanisms that exploit the spatial
locality inherent in actuation to drive faster and
scalable application execution through opportunistic
data parallel operation. As a result, applications
encoded in the proposed Hotline Application Programming
Framework are error free, and in many scenarios,
exhibit faster reactions to environmental events over
conventional implementations. Relative to our prior
work, this article extends Hotline with a new
locality-sensitive coordination mechanism for improved
reaction times and two tunable iteration control
schemes for lower message costs. Our extensive
evaluation demonstrates that realistic performance and
cost of applications are highly sensitive to the
prevalent deployment, network, and environmental
characteristics. This highlights the importance of
Hotline, which provides user-configurable options to
trivially tune these metrics and thus affords time to
the developers for implementing, evaluating, and
comparing multiple algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Brandt:2014:PCS,
author = "Jens Brandt and Klaus Schneider and Yu Bai",
title = "Passive code in synchronous programs",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "67:1--67:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544387",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The synchronous model of computation requires that in
every step, inputs are read and outputs are
synchronously computed as the reaction of the program.
In addition, all internal variables are updated in
parallel even though not all of these values might be
required for the current and the future reaction steps.
To avoid unnecessary computations, we present a
compile-time optimization procedure that computes for
every variable a condition that determines whether its
value is required for current or future computations.
In this sense, our optimizations allow us to identify
passive code that can be disabled to avoid unnecessary
computations and therefore to reduce the reaction time
of programs or their energy consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gu:2014:AES,
author = "Yu Gu and Liang He and Ting Zhu and Tian He",
title = "Achieving energy-synchronized communication in
energy-harvesting wireless sensor networks",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "68:1--68:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544388",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With advances in energy-harvesting techniques, it is
now feasible to build sustainable sensor networks to
support long-term applications. Unlike battery-powered
sensor networks, the objective of sustainable sensor
networks is to effectively utilize a continuous stream
of ambient energy. Instead of pushing the limits of
energy conservation, we aim to design
energy-synchronized schemes that keep energy supplies
and demands in balance. Specifically, this work
presents Energy-Synchronized Communication (ESC) as a
transparent middleware between the network layer and
MAC layer that controls the amount and timing of RF
activity at receiving nodes. In this work, we first
derive a delay model for cross-traffic at individual
nodes, which reveals an interesting stair effect. This
effect allows us to design a localized energy
synchronization control with $ o(d^3) $ time complexity
that shuffles or adjusts the working schedule of a node
to optimize cross-traffic delays in the presence of
changing duty cycle budgets, where d is the node degree
in the network. Under different rates of energy
fluctuations, shuffle-based and adjustment-based
methods have different influences on logical
connectivity and cross-traffic delay, due to the
inconsistent views of working schedules among
neighboring nodes before schedule updates. We study the
trade-off between them and propose methods for updating
working schedules efficiently. To evaluate our work,
ESC is implemented on MicaZ nodes with two
state-of-the-art routing protocols. Both testbed
experiment and large-scale simulation results show
significant performance improvements over randomized
synchronization controls.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2014:CFE,
author = "Jinkyu Lee and Arvind Easwaran and Insik Shin",
title = "Contention-free executions for real-time
multiprocessor scheduling",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "69:1--69:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2494530",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A time slot is defined as contention-free if the
number of jobs with remaining executions in the slot is
no larger than the number of processors, or contending,
otherwise. Then an important property holds that in any
contention-free slot, all jobs with remaining
executions are guaranteed to be scheduled as long as
the scheduler is work-conserving. This article aims at
improving schedulability by utilizing the
contention-free slots. To achieve this, this article
presents a policy (called CF policy) that moves some
job executions from contending slots to contention-free
ones. This policy can be employed by any
work-conserving, preemptive scheduling algorithm, and
we show that any algorithm extended with this policy
dominates the original algorithm in terms of
schedulability. We also present improved schedulability
tests for algorithms that employ this policy, based on
the observation that interference from jobs is reduced
when their executions are postponed to contention-free
slots. Simulation results demonstrate that the CF
policy, incorporated into existing algorithms,
significantly improves schedulability of those existing
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2014:TMP,
author = "Huang Huang and Vivek Chaturvedi and Gang Quan and
Jeffrey Fan and Meikang Qiu",
title = "Throughput maximization for periodic real-time systems
under the maximal temperature constraint",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "70:1--70:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544390",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we study the problem of how to
maximize the throughput of a periodic real-time system
under a given peak temperature constraint. We assume
that different tasks in our system may have different
power and thermal characteristics. Two scheduling
approaches are presented. The first is built upon
processors that can be in either active or sleep mode.
By judiciously selecting tasks with different thermal
characteristics as well as alternating the processor's
active / sleep mode, the sleep period required to cool
down the processor is kept at a minimum level, and, as
the result, the throughput is maximized. We further
extend this approach for processors with dynamic
voltage/frequency scaling (DVFS) capability. Our
experiments on a large number of synthetic test cases
as well as real benchmark programs show that the
proposed methods not only consistently outperform the
existing approaches in terms of throughput
maximization, but also significantly improve the
feasibility of tasks when a more stringent temperature
constraint is imposed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Elewi:2014:EET,
author = "Abdullah Elewi and Mohamed Shalan and Medhat Awadalla
and Elsayed M. Saad",
title = "Energy-efficient task allocation techniques for
asymmetric multiprocessor embedded systems",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "71:1--71:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544391",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Asymmetric multiprocessor systems are considered
power-efficient multiprocessor architectures.
Furthermore, efficient task allocation (partitioning)
can achieve more energy efficiency at these asymmetric
multiprocessor platforms. This article addresses the
problem of energy-aware static partitioning of periodic
real-time tasks on asymmetric multiprocessor
(multicore) embedded systems. The article formulates
the problem according to the Dynamic Voltage and
Frequency Scaling (DVFS) model supported by the
platform and shows that it is an NP-hard problem. Then,
the article outlines optimal reference partitioning
techniques for each case of DVFS model with suitable
assumptions. Finally, the article proposes
modifications to the traditional bin-packing techniques
and designs novel techniques taking into account the
DVFS model supported by the platform. All algorithms
and techniques are simulated and compared. The
simulation shows promising results, where the proposed
techniques reduced the energy consumption by 75\%
compared to traditional methods when DVFS is not
supported and by 50\% when per-core DVFS is supported
by the platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Das:2014:EAT,
author = "Anup Das and Akash Kumar and Bharadwaj Veeravalli",
title = "Energy-aware task mapping and scheduling for reliable
embedded computing systems",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "72:1--72:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544392",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Task mapping and scheduling are critical in minimizing
energy consumption while satisfying the performance
requirement of applications enabled on heterogeneous
multiprocessor systems. An area of growing concern for
modern multiprocessor systems is the increase in the
failure probability of one or more component
processors. This is especially critical for
applications where performance degradation (e.g.,
throughput) directly impacts the quality of service
requirement. This article proposes a design-time
(offline) multi-criterion optimization technique for
application mapping on embedded multiprocessor systems
to minimize energy consumption for all processor
fault-scenarios. A scheduling technique is then
proposed based on self-timed execution to minimize the
schedule storage and construction overhead at runtime.
Experiments conducted with synthetic and real
applications from streaming and nonstreaming domains on
heterogeneous MPSoCs demonstrate that the proposed
technique minimizes energy consumption by 22\% and
design space exploration time by $ 100 \times $, while
satisfying the throughput requirement for all processor
fault-scenarios. For scalable throughput applications,
the proposed technique achieves 30\% better throughput
per unit energy, compared to the existing techniques.
Additionally, the self-timed execution-based scheduling
technique minimizes schedule construction time by 95\%
and storage overhead by 92\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2014:STN,
author = "Xiaohang Wang and Mei Yang and Yingtao Jiang and Peng
Liu and Masoud Daneshtalab and Maurizio Palesi and
Terrence Mak",
title = "On self-tuning networks-on-chip for dynamic
network-flow dominance adaptation",
journal = j-TECS,
volume = "13",
number = "2s",
pages = "73:1--73:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2544375.2544393",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Jan 28 17:34:43 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern network-on-chip (NoC) systems are required to
handle complex runtime traffic patterns and
unprecedented applications. Data traffics of these
applications are difficult to fully comprehend at
design time so as to optimize the network design.
However, it has been discovered that the majority of
dataflows in a network are dominated by less than 10\%
of the specific pathways. In this article, we introduce
a method that is capable of identifying critical
pathways in a network at runtime and can then
dynamically reconfigure the network to optimize for
network performance subject to the identified dominated
flows. An online learning and analysis scheme is
employed to quickly discover the emerging dominated
traffic flows and provides a statistical traffic
prediction using regression analysis. The architecture
of a self-tuning network is also discussed which can be
reconfigured by setting up the identified
point-to-point paths for the dominance dataflows in
large traffic volumes. The merits of this new approach
are experimentally demonstrated using comprehensive NoC
simulations. Compared to the conventional network
architectures over a range of realistic applications,
the proposed self-tuning network approach can
effectively reduce the latency and power consumption by
as much as 25\% and 24\%, respectively. We also
evaluated the configuration time and additional
hardware cost. This new approach demonstrates the
capability of an adaptive NoC to handle more complex
and dynamic applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bournoutian:2013:AAA,
author = "Garo Bournoutian and Alex Orailoglu",
title = "Application-aware adaptive cache architecture for
power-sensitive mobile processors",
journal = j-TECS,
volume = "13",
number = "3",
pages = "41:1--41:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539037",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today, mobile smartphones are expected to be able to
run the same complex, algorithm-heavy, memory-intensive
applications that were originally designed and coded
for general-purpose processors. All the while, it is
also expected that these mobile processors be
power-conscientious as well as of minimal area impact.
These devices pose unique usage demands of
ultra-portability but also demand an always-on,
continuous data access paradigm. As a result, this
dichotomy of continuous execution versus long battery
life poses a difficult challenge. This article explores
a novel approach to mitigating mobile processor power
consumption while abating any significant degradation
in execution speed. The concept relies on efficiently
leveraging both compile-time and runtime application
memory behavior to intelligently target adjustments in
the cache to significantly reduce overall processor
power, taking into account both the dynamic and leakage
power footprint of the cache subsystem. The simulation
results show a significant reduction in power
consumption of approximately 13\% to 29\%, while only
incurring a nominal increase in execution time and
area.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2013:GOV,
author = "Bo Zhou and Kai Xiao and Danny Z. Chen and X. Sharon
Hu",
title = "{GPU}-optimized volume ray tracing for massive numbers
of rays in radiotherapy",
journal = j-TECS,
volume = "13",
number = "3",
pages = "42:1--42:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539038",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Ray tracing within a uniform grid volume is a
fundamental process invoked frequently by many
applications, especially radiation-dose calculation
methods in radiotherapy. However, the conflicting
features between the GPU memory architecture and the
memory-accessing patterns of volume ray tracing lead to
inefficient usage of GPU memory bandwidth and waste of
capability of modern GPUs. To improve the ray tracing
performance on GPU, we propose a lookup-table-based ray
tracing method which is specially optimized towards the
GPU memory system for processing a massive number of
rays. The proposed method is based on a key observation
that many of these applications normally involves a
massive number of rays, but their ray tracing may not
need to follow a specific execution order. Therefore,
we divide the 3D space into many regions (called
pyramids) and group together the rays falling into the
same pyramid. For each ray group, the volume is rotated
and resampled for their raytracing. This
divide-and-rotate strategy allows the memory access of
the ray tracing process to adopt a table-lookup
approach and leads to better memory coalescing on GPU.
Our proposed method was thoroughly evaluated in four
volume setups with randomly-generated rays. The
collapsed-cone convolution/superposition (CCCS) dose
calculation method is also implemented with/without the
proposed approach to verify the feasibility of our
method. Compared with the direct GPU implementation of
the popular 3DDDA algorithm, our method provides a
speedup in the range of 1.91--2.94X for the volume
settings we used. Major performance factors, including
ray origins, volume size, and pyramid size, are also
analyzed. The proposed technique was also found to be
able to give a speedup of 1.61--2.17X over the original
GPU implementation of the CCCS algorithm. Our
experiment results indicate that the proposed approach
is capable of offering better coalesced memory access
which eventually boosts the raytracing performance on
GPU. Moreover, our approach is conceptually simple and
can be readily included into various applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2013:AAF,
author = "Yun Liang and Tulika Mitra",
title = "An analytical approach for fast and accurate design
space exploration of instruction caches",
journal = j-TECS,
volume = "13",
number = "3",
pages = "43:1--43:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539039",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Application-specific system-on-chip platforms create
the opportunity to customize the cache configuration
for optimal performance with minimal chip area.
Simulation, in particular trace-driven simulation, is
widely used to estimate cache hit rates. However,
simulation is too slow to be deployed in design space
exploration, especially when there are hundreds of
design points and the traces are huge. In this article,
we propose a novel analytical approach for design space
exploration of instruction caches. Given the program
control flow graph (CFG) annotated only with basic
block and control flow edge execution counts, we first
model the cache states at each point of the CFG in a
probabilistic manner. Then, we exploit the structural
similarities among related cache configurations to
estimate the cache hit rates for multiple cache
configurations in one pass. Experimental results
indicate that our analysis is 28--2,500 times faster
compared to the fastest known cache simulator while
maintaining high accuracy (0.2\% average error) in
estimating cache hit rates for a large set of popular
benchmarks. Moreover, compared to a state-of-the-art
cache design space exploration technique, our approach
achieves 304--8,086 times speedup and saves up to 62\%
(average 7\%) energy for the evaluated benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bourke:2013:AES,
author = "Timothy Bourke and Arcot Sowmya",
title = "Analyzing an embedded sensor with timed automata in
{Uppaal}",
journal = j-TECS,
volume = "13",
number = "3",
pages = "44:1--44:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539040",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "An infrared sensor is modeled and analyzed in Uppaal.
The sensor typifies the sort of component that
engineers regularly integrate into larger systems by
writing interface hardware and software. In all, three
main models are developed. In the first model, the
timing diagram of the sensor is interpreted and modeled
as a timed safety automaton. This model serves as a
specification for the complete system. A second model
that emphasizes the separate roles of driver and sensor
is then developed. It is validated against the timing
diagram model using an existing construction that
permits the verification of timed trace inclusion, for
certain models, by reachability analysis (i.e., model
checking). A transmission correctness property is also
stated by means of an auxiliary automaton and shown to
be satisfied by the model. A third model is created
from an assembly language driver program, using a
direct translation from the instruction set of a
processor with simple timing behavior. This model is
validated against the driver component of the second
timing diagram model using the timed trace inclusion
validation technique. The approach and its limitations
offer insight into the nature and challenges of
programming in real time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Collins:2013:FFS,
author = "Rebecca L. Collins and Luca P. Carloni",
title = "Flexible filters in stream programs",
journal = j-TECS,
volume = "13",
number = "3",
pages = "45:1--45:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539041",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The stream-processing model is a natural fit for
multicore systems because it exposes the inherent
locality and concurrency of a program and highlights
its separable tasks for efficient parallel
implementations. We present flexible filters, a
load-balancing optimization technique for stream
programs. Flexible filters utilize the programmability
of the cores in order to improve the data-processing
throughput of individual bottleneck tasks by
``borrowing'' resources from neighbors in the stream.
Our technique is distributed and scalable because all
runtime load-balancing decisions are based on
point-to-point handshake signals exchanged between
neighboring cores. Load balancing with flexible filters
increases the system-level processing throughput of
stream applications, particularly those with large
dynamic variations in the computational load of their
tasks. We empirically evaluate flexible filters in a
homogeneous multicore environment over a suite of five
real-word stream programs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hashemi:2013:TMF,
author = "Matin Hashemi and Mohammad H. Foroozannejad and Soheil
Ghiasi",
title = "Throughput-memory footprint trade-off in synthesis of
streaming software on embedded multiprocessors",
journal = j-TECS,
volume = "13",
number = "3",
pages = "46:1--46:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539042",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We study the trade-off between throughput and memory
footprint of embedded software that is synthesized from
acyclic static dataflow (task graph) specifications
targeting distributed memory multiprocessors. We
identify iteration overlapping as a knob in the
synthesis process by which one can trade application
throughput for its memory requirement. Given an initial
processor assignment and non-overlapped task schedule,
we formally present underlying properties of the
problem, such as constraints on a valid iteration
overlapping, maximum possible throughput, and minimum
memory footprint. Moreover, we develop an effective
algorithm for generation of a rich set of design points
that provide a range of trade-off options. Experimental
results on a number of applications and architectures
validate the effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Biswas:2013:RTS,
author = "Swarnendu Biswas and Rajib Mall and Manoranjan
Satpathy",
title = "A regression test selection technique for embedded
software",
journal = j-TECS,
volume = "13",
number = "3",
pages = "47:1--47:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539043",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The current approaches for regression test selection
of embedded programs are usually based on data- and
control-dependency analyses, often augmented with human
reasoning. Existing techniques do not take into account
additional execution dependencies which may exist among
code elements in such programs due to features such as
tasks, task deadlines, task precedences, and intertask
communications. In this context, we propose a
model-based regression test selection technique for
such programs. Our technique first constructs a graph
model of the program; the proposed graph model has been
designed to capture several characteristics of embedded
programs, such as task precedence order, priority,
intertask communication, timers, exceptions and
interrupt handlers, which we consider important for
regression-test selection. Our regression test
selection technique selects test cases based on an
analysis of the constructed graph model. We have
implemented our technique to realize a prototype tool.
The experimental results obtained using this tool show
that, on average, our approach selects about 28.33\%
more regression test cases than those selected by a
traditional approach. We observed that, on average,
36.36\% of the fault-revealing test cases were
overlooked by the existing regression test selection
technique.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Majumdar:2013:TRO,
author = "Rupak Majumdar and Elaine Render and Paulo Tabuada",
title = "A theory of robust omega-regular software synthesis",
journal = j-TECS,
volume = "13",
number = "3",
pages = "48:1--48:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539044",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A key property for systems subject to uncertainty in
their operating environment is robustness: ensuring
that unmodeled but bounded disturbances have only a
proportionally bounded effect upon the behaviors of the
system. Inspired by ideas from robust control and
dissipative systems theory, we present a formal
definition of robustness as well as algorithmic tools
for the design of optimally robust controllers for $
\omega $ -regular properties on discrete transition
systems. Formally, we define metric automata ---
automata equipped with a metric on states --- and
strategies on metric automata which guarantee
robustness for $ \omega $-regular properties. We
present fixed-point algorithms to construct optimally
robust strategies in polynomial time. In contrast to
strategies computed by classical graph theoretic
approaches, the strategies computed by our algorithm
ensure that the behaviors of the controlled system
gracefully degrade under the action of disturbances;
the degree of degradation is parameterized by the
magnitude of the disturbance. We show an application of
our theory to the design of controllers that tolerate
infinitely many transient errors provided they occur
infrequently enough.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{You:2013:EAC,
author = "Yi-Ping You and Shen-Hong Wang",
title = "Energy-aware code motion for {GPU} shader processors",
journal = j-TECS,
volume = "13",
number = "3",
pages = "49:1--49:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539045",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics processing units (GPUs) are now being widely
adopted in system-on-a-chip designs, and they are often
used in embedded systems for manipulating computer
graphics or even for general-purpose computation.
Energy management is of concern to both hardware and
software designers. In this article, we present an
energy-aware code-motion framework for a compiler to
generate concentrated accesses to input and output
(I/O) buffers inside a GPU. Our solution attempts to
gather the I/O buffer accesses into clusters, thereby
extending the time period during which the I/O buffers
are clock or power gated. We performed experiments in
which the energy consumption was simulated by
incorporating our compiler-analysis and code-motion
framework into an in-house compiler tool. The
experimental results demonstrated that our mechanisms
were effective in reducing the energy consumption of
the shader processor by an average of 13.1\% and
decreasing the energy-delay product by 2.2\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2013:RAE,
author = "Tiantian Liu and Alex Orailoglu and Chun Jason Xue and
Minming Li",
title = "Register allocation for embedded systems to
simultaneously reduce energy and temperature on
registers",
journal = j-TECS,
volume = "13",
number = "3",
pages = "50:1--50:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539046",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy and thermal issues are two important concerns
for embedded system design. Diminished energy
dissipation leads to a longer battery life, while
reduced temperature hotspots decelerate the physical
failure mechanisms. The instruction fetch logic
associated with register access has a significant
contribution towards the total energy consumption.
Meanwhile, the register file has also been previously
shown to exhibit the highest temperature compared to
the rest of the components in an embedded processor.
Therefore, the optimization of energy and the
resolution of the thermal issue for register accesses
are of great significance. In this article, register
allocation techniques are studied to simultaneously
reduce energy consumption and heat buildup on register
accesses for embedded systems. Contrary to prevailing
intuition, we observe that optimizing energy and
optimizing temperature on register accesses conflict
with each other. We introduce a rotator hardware in the
instruction decoder to facilitate a balanced solution
for the two conflicting objectives. Algorithms for
register allocation and refinement are proposed based
on the access patterns and the effects of the rotator.
Experimental results show that the proposed algorithms
obtain notable improvements of energy and peak
temperature for embedded applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lizarraga:2013:DPF,
author = "Adrian Lizarraga and Roman Lysecky and Susan Lysecky
and Ann Gordon-Ross",
title = "Dynamic profiling and fuzzy-logic-based optimization
of sensor network platforms",
journal = j-TECS,
volume = "13",
number = "3",
pages = "51:1--51:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539047",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The commercialization of sensor-based platforms is
facilitating the realization of numerous sensor network
applications with diverse application requirements.
However, sensor network platforms are becoming
increasingly complex to design and optimize due to the
multitude of interdependent parameters that must be
considered. To further complicate matters, application
experts oftentimes are not trained engineers, but
rather biologists, teachers, or agriculturists who wish
to utilize the sensor-based platforms for various
domain-specific tasks. To assist both platform
developers and application experts, we present a
centralized dynamic profiling and optimization platform
for sensor-based systems that enables application
experts to rapidly optimize a sensor network for a
particular application without requiring extensive
knowledge of, and experience with, the underlying
physical hardware platform. In this article, we present
an optimization framework that allows developers to
characterize application requirements through
high-level design metrics and fuzzy-logic-based
optimization. We further analyze the benefits of
utilizing dynamic profiling information to eliminate
the guesswork of creating a ``good'' benchmark, present
several reoptimization evaluation algorithms used to
detect if re-optimization is necessary, and highlight
the benefits of the proposed dynamic optimization
framework compared to static optimization
alternatives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ben-Asher:2013:BUV,
author = "Yosi Ben-Asher and Nadav Rotem",
title = "The benefits of using variable-length pipelined
operations in high-level synthesis",
journal = j-TECS,
volume = "13",
number = "3",
pages = "52:1--52:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539048",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Current high-level synthesis systems synthesize
arithmetic units of a fixed known number of stages, and
the scheduler mainly determines when units are
activated. We focus on scheduling techniques for the
high-level synthesis of pipelined arithmetic units
where the number of stages of these operations is a
free parameter of the synthesis. This problem is
motivated by the ability to automatically create
pipelined functional units, such as multipliers, with
different pipe lengths. These units have different
characteristics in terms of parallelism level, clock
latency, frequency, etc. This article presents the
Variable-length Pipeline Scheduler (VPS). The ability
to synthesize variable-length pipelined units expands
the known scheduling problem of high-level synthesis to
include a search for a minimal number of hardware units
(operations) and their desired number of stages. The
proposed search procedure is based on algorithms that
find a local minima in a d -dimensional grid, thus
avoiding the need to evaluate all possible points in
the space. We have implemented a C language compiler
for VPS targeting FPGAs. Our results demonstrate that
using variable-length pipeline units can reduce the
overall resource usage and improve the execution time
when synthesized onto an FPGA. The proposed search is
sufficiently fast, taking only a few seconds, allowing
an interactive mode of work. A comparison with xPilot
shows a significant saving of hardware resources while
maintaining comparable execution times of the resulting
circuits. This work is an extension of a previous paper
[Ben-Asher and Rotem 2008]",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2013:RDD,
author = "Yu-Ming Chang and Pi-Cheng Hsiu and Yuan-Hao Chang and
Che-Wei Chang",
title = "A resource-driven {DVFS} scheme for smart handheld
devices",
journal = j-TECS,
volume = "13",
number = "3",
pages = "53:1--53:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539049",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Reducing the energy consumption of the emerging genre
of smart handheld devices while simultaneously
maintaining mobile applications and services is a major
challenge. This work is inspired by an observation on
the resource usage patterns of mobile applications. In
contrast to existing DVFS scheduling algorithms and
history-based prediction techniques, we propose a
resource-driven DVFS scheme in which resource state
machines are designed to model the resource usage
patterns in an online fashion to guide DVFS. We have
implemented the proposed scheme on Android smartphones
and conducted experiments based on real-world
applications. The results are very encouraging and
demonstrate the efficacy of the proposed scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kyrkou:2013:HAR,
author = "Christos Kyrkou and Christos Ttofis and Theocharis
Theocharides",
title = "A hardware architecture for real-time object detection
using depth and edge information",
journal = j-TECS,
volume = "13",
number = "3",
pages = "54:1--54:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539050",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Emerging embedded 3D vision systems for robotics and
security applications utilize object detection to
perform video analysis in order to intelligently
interact with their host environment and take
appropriate actions. Such systems have high performance
and high detection-accuracy demands, while requiring
low energy consumption, especially when dealing with
embedded mobile systems. However, there is a large
image search space involved in object detection,
primarily because of the different sizes in which an
object may appear, which makes it difficult to meet
these demands. Hence, it is possible to meet such
constraints by reducing the search space involved in
object detection. To this end, this article proposes a
depth and edge accelerated search method and a
dedicated hardware architecture that implements it to
provide an efficient platform for generic real-time
object detection. The hardware integration of depth and
edge processing mechanisms, with a support vector
machine classification core onto an FPGA platform,
results in significant speed-ups and improved detection
accuracy. The proposed architecture was evaluated using
images of various sizes, with results indicating that
the proposed architecture is capable of achieving
real-time frame rates for a variety of image sizes (271
fps for 320 $ \times $ 240, 42 fps for 640 $ \times $
480, and 23 fps for 800 $ \times $ 600) compared to
existing works, while reducing the false-positive rate
by 52\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2013:ALC,
author = "Li-Pin Chang and Tung-Yang Chou and Li-Chun Huang",
title = "An adaptive, low-cost wear-leveling algorithm for
multichannel solid-state disks",
journal = j-TECS,
volume = "13",
number = "3",
pages = "55:1--55:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2539036.2539051",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Dec 18 19:07:39 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multilevel flash memory cells double or even triple
storage density, producing affordable solid-state disks
for end users. As flash memory endures only limited
program-erase cycles, solid-state disks employ
wear-leveling methods to prevent any portions of flash
memory from being retired prematurely. Modern
solid-state disks must consider wear evenness at both
block and channel levels. This study first presents a
block-level wear-leveling method whose design has two
new ideas. First, the proposed method reuses the
intelligence available in flash-translation layers so
it does not require any new data structures. Second, it
adaptively tunes the threshold of block-level wear
leveling according to the runtime write pattern. This
study further introduces a new channel-level
wear-leveling strategy, because block-level wear
leveling is confined to a channel, but realistic
workloads do not evenly write all channels. The
proposed method swaps logical blocks among channels for
achieving an eventually-even state of channel
lifetimes. A series of trace-driven simulations show
that our wear-leveling method outperforms existing
approaches in terms of wear evenness and overhead
reduction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2014:EES,
author = "Sandeep K. Shukla",
title = "Editorial: Embedded systems --- more than
methodology",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "99:1--99:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2587894",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Daneshtalab:2014:ESI,
author = "Masoud Daneshtalab and Maurizio Palesi and Juha
Plosila",
title = "Editorial: Special issue on design challenges for
many-core processors",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "100:1--100:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567941",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Koohi:2014:TSL,
author = "Somayyeh Koohi and Yawei Yin and Shaahin Hessabi and
S. J. Ben Yoo",
title = "Towards a scalable, low-power all-optical architecture
for networks-on-chip",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "101:1--101:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567930",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a scalable wavelength-routed
optical Network on Chip (NoC) based on the Spidergon
topology, named Power-efficient Scalable
Wavelength-routed Network-on-chip (PeSWaN). The key
idea of the proposed all-optical architecture is the
utilization of per-receiver wavelengths in the data
network to prevent network contention and the adoption
of per-sender wavelengths in the control network to
avoid end-point contention. By performing a series of
simulations, we study the efficiency of the proposed
architecture, its power and energy consumption, and the
data transmission delay. Moreover, we compare the
proposed architecture with electrical NoCs and
alternative ONoC architectures under various traffic
patterns.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lhuillier:2014:HHA,
author = "Yves Lhuillier and Maroun Ojail and Alexandre Guerre
and Jean-Marc Philippe and Karim Ben Chehida and Farhat
Thabet and Caaliph Andriamisaina and Chafic Jaber and
Rapha{\"e}l David",
title = "{HARS}: a hardware-assisted runtime software for
embedded many-core architectures",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "102:1--102:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2517311",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The current trend in embedded computing consists in
increasing the number of processing resources on a
chip. Following this paradigm, cluster-based many-core
accelerators with a shared hierarchical memory have
emerged. Handling synchronizations on these
architectures is critical since parallel
implementations speed-ups of embedded applications
strongly depend on the ability to exploit the largest
possible number of cores while limiting task management
overhead. This article presents the combination of a
low-overhead complete runtime software and a flexible
hardware accelerator for synchronizations called HARS
(Hardware-Assisted Runtime Software). Experiments on a
multicore test chip showed that the hardware
accelerator for synchronizations has less than 1\% area
overhead compared to a cluster of the chip while
reducing synchronization latencies (up to 2.8 times
compared to a test-and-set implementation) and
contentions. The runtime software part offers basic
features like memory management but also optimized
execution engines to allow the easy and efficient
extraction of the parallelism in applications with
multiple programming models. By using the hardware
acceleration as well as a very low overhead task
scheduling software technique, we show that HARS
outperforms an optimized state-of-the-art task
scheduler by 13\% for the execution of a parallel
application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2014:CTR,
author = "Qiang Yang and Jian Fu and Raphael Poss and Chris
Jesshope",
title = "On-chip traffic regulation to reduce coherence
protocol cost on a microthreaded many-core architecture
with distributed caches",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "103:1--103:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567931",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "When hardware cache coherence scales to many cores on
chip, over saturated traffic of the shared memory
system may offset the benefit from massive hardware
concurrency. In this article, we investigate the cost
of a write-update protocol in terms of on-chip memory
network traffic and its adverse effects on the system
performance based on a multithreaded many-core
architecture with distributed caches. We discuss
possible software and hardware solutions to alleviate
the network pressure. We find that in the context of
massive concurrency, by introducing a write-merging
buffer with 0.46\% area overhead to each core,
applications with good locality and concurrency are
boosted up by 18.74\% in performance on average. Other
applications also benefit from this addition and even
achieve a throughput increase of 5.93\%. In addition,
this improvement indicates that higher levels of
concurrency per core can be exploited without impacting
performance, thus tolerating latency better and giving
higher processor efficiencies compared to other
solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Parikh:2014:FCF,
author = "Ritesh Parikh and Valeria Bertacco",
title = "{ForEVeR}: a complementary formal and runtime
verification approach to correct {NoC} functionality",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "104:1--104:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514871",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As silicon technology scales, modern processor and
embedded systems are rapidly shifting towards complex
chip multi-processor (CMP) and system-on-chip (SoC)
designs. As a side effect of complexity of these
designs, ensuring their correctness has become
increasingly problematic. Within these domains,
Network-on-Chips (NoCs) are a de-facto choice to
implement on-chip interconnect; their design is quickly
becoming extremely complex in order to keep up with
communication performance demands. As a result, design
errors in the NoC may go undetected and escape into the
final silicon. In this work, we propose ForEVeR, a
solution that complements the use of formal methods and
runtime verification to ensure functional correctness
in NoCs. Formal verification, due to its scalability
limitations, is used to verify smaller modules, such as
individual router components. To deliver correctness
guarantees for the complete network, we propose a
network-level detection and recovery solution that
monitors the traffic in the NoC and protects it against
escaped functional bugs. To this end, ForEVeR augments
the baseline NoC with a lightweight checker network
that alerts destination nodes of incoming packets ahead
of time. If a bug is detected, flagged by missed packet
arrivals, our recovery mechanism delivers the in-flight
data safely to the intended destination via the checker
network. ForEVeR's experimental evaluation shows that
it can recover from NoC design errors at only 4.9\%
area cost for an $ 8 \times 8 $ mesh interconnect, over
a time interval ranging from 0.5K to 30K cycles per
recovery event, and it incurs no performance overhead
in the absence of errors. ForEVeR can also protect NoC
operations against soft-errors: a growing concern with
the scaling of silicon. ForEVeR leverages the same
monitoring hardware to detect soft-error
manifestations, in addition to design-errors. Recovery
of the soft-error affected packets is guaranteed by
building resiliency features into our checker network.
ForEVeR incurs minimal performance penalty up to a flit
error rate of 0.01\% in lightly loaded networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DelBarrio:2014:ULP,
author = "Alberto A. {Del Barrio} and Nader Bagherzadeh and
Rom{\'a}n Hermida",
title = "Ultra-low-power adder stage design for exascale
floating point units",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "105:1--105:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567932",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Currently, the most powerful supercomputers can
provide tens of petaflops. Future many-core systems are
estimated to provide an exaflop. However, the power
budget limitation makes these machines still infeasible
and unaffordable. Floating Point Units (FPUs) are
critical from both the power consumption and
performance points of view of today's microprocessors
and supercomputers. Literature offers very different
designs. Some of them are focused on increasing
performance no matter the penalty, and others on
decreasing power at the expense of lower performance.
In this article, we propose a novel approach for
reducing the power of the FPU without degrading the
rest of parameters. Concretely, this power reduction is
also accompanied by an area reduction and a performance
improvement. Hence, an overall energy gain will be
produced. According to our experiments, our proposed
unit consumes 17.5\%, 23\% and 16.5\% less energy for
single, double and quadruple precision, with an
additional 15\%, 21.5\% and 14.5\% delay reduction,
respectively. Furthermore, area is also diminished by
4\%, 4.5\% and 5\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2014:YES,
author = "Yu-Jen Huang and Jin-Fu Li",
title = "Yield-enhancement schemes for multicore processor and
memory stacked {$3$D ICs}",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "106:1--106:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567933",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A three-dimensional (3D) integrated circuit (IC) with
multiple dies vertically connected by
through-silicon-via (TSV) offers many benefits over
current 2D ICs. Multicore logic-memory die stacking has
been considered as one candidate for 3D ICs by
utilizing the TSV to provide high data bandwidth
between logic and memory. However, 3D ICs suffer from
the low-yield issue. This article proposes effective
yield-enhancement techniques for multicore die-stacked
3D ICs. Two reconfiguration schemes are proposed to
logically swap the positions of cores in the dies of 3D
ICs such that the yield of 3D ICs is increased. Two
algorithms also are proposed to determine the
reconfiguration effectively. Simulation results show
that the proposed reconfiguration schemes can achieve a
yield gain ranging from 1\% to 11\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Arnold:2014:TPH,
author = "Oliver Arnold and Emil Matus and Benedikt Noethen and
Markus Winter and Torsten Limberg and Gerhard
Fettweis",
title = "{Tomahawk}: Parallelism and heterogeneity in
communications signal processing {MPSoCs}",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "107:1--107:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2517087",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneity and parallelism in MPSoCs for 4G (and
beyond) communications signal processing are inevitable
in order to meet stringent power constraints and
performance requirements. The question arises on how to
cope with the problem of system programmability and
runtime management incurred by the statically or even
dynamically varying number and type of processing
elements. This work addresses this challenge by
proposing the concept of a heterogeneous many-core
platform called Tomahawk. Apart from the definition of
the system architecture, in this approach a unified
framework including a model of computation, a
programming interface and a dedicated runtime
management unit called CoreManager is proposed. The
increase of system complexity in terms of application
parallelism and number of resources may lead to a
dramatic increase of the management costs, hence
causing performance degradation. For this reason, the
efficient implementation of the CoreManager becomes a
major issue in system design. This work compares the
performance and capabilities of various CoreManager
HW/SW solutions, based on ASIC, RISC and ASIP
paradigms. The results demonstrate that the proposed
ASIP-based solution approaches the performance of the
ASIC realization, while preserving the full flexibility
of the software (RISC-based) implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "107",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jin:2014:PPA,
author = "Yuho Jin and Timothy Mark Pinkston",
title = "{PAIS}: Parallelism-aware interconnect scheduling in
multicores",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "108:1--108:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567934",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multicore processors have the potential to deliver
scalable performance by distributing computation across
multiple cores. However, the communication cost of
parallel application thread execution may significantly
limit the performance achievable due to latency and
contention on shared resources in the on-chip network
of multicores experienced by packets from critical
threads. We present PAIS, Parallelism-Aware
Interconnect Scheduling, that bolsters performance and
energy efficiency of parallel applications. PAIS
dynamically detects thread execution progress based on
communication latency and scheduling, and it
accelerates communication for slowly executing threads
by prioritizing packets from those threads with flow
control and priority-based arbitration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "108",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Casu:2014:UMI,
author = "Mario R. Casu and Francesco Colonna and Marco Crepaldi
and Danilo Demarchi and Mariagrazia Graziano and
Maurizio Zamboni",
title = "{UWB} microwave imaging for breast cancer detection:
Many-core, {GPU}, or {FPGA?}",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "109:1--109:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2530534",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "An UWB microwave imaging system for breast cancer
detection consists of antennas, transceivers, and a
high-performance embedded system for elaborating the
received signals and reconstructing breast images. In
this article we focus on this embedded system. To
accelerate the image reconstruction, the Beamforming
phase has to be implemented in a parallel fashion. We
assess its implementation in three currently available
high-end platforms based on a multicore CPU, a GPU, and
an FPGA, respectively. We then project the results
applying technology scaling rules to future many-core
CPUs, many-thread GPUs, and advanced FPGAs. We consider
an optimistic case in which available resources
increase according to Moore's law only, and a
pessimistic case in which only a fraction of those
resources are available due to a limited power budget.
In both scenarios, an implementation that includes a
high-end FPGA outperforms the other alternatives. Since
the number of effectively usable cores in future
many-cores will be power-limited, and there is a trend
toward the integration of power-efficient accelerators,
we conjecture that a chip consisting of a many-core
section and a reconfigurable logic section will be the
perfect platform for this application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "109",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Palesi:2014:ESS,
author = "Maurizio Palesi and Todor Stefanov",
title = "Editorial: Special Section on {ESTIMedia'13}",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "110:1--110:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567942",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "110",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2014:EOR,
author = "Gang Chen and Kai Huang and Alois Knoll",
title = "Energy optimization for real-time multiprocessor
system-on-chip with optimal {DVFS} and {DPM}
combination",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "111:1--111:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567935",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy optimization is a critical design concern for
embedded systems. Combining D VFS+DPM is considered as
one preferable technique to reduce energy consumption.
There have been optimal DVFS+DPM algorithms for
periodic independent tasks running on uniprocessor in
the literature. Optimal combination of DVFS and DPM for
periodic dependent tasks on multicore systems is
however not yet reported. The challenge of this problem
is that the idle intervals of cores are not easy to
model. In this article, a novel technique is proposed
to directly model the idle intervals of individual
cores such that both DVFS and DPM can be optimized at
the same time. Based on this technique, the energy
optimization problem is formulated by means of mixed
integrated linear programming. We also present
techniques to prune the exploration space of the
formulation. Experimental results using real-world
benchmarks demonstrate the effectiveness of our
approach compared to existing approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "111",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Abdel-Khalek:2014:PSP,
author = "Rawan Abdel-Khalek and Valeria Bertacco",
title = "Post-silicon platform for the functional diagnosis and
debug of networks-on-chip",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "112:1--112:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567936",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The increasing number of units in today's
systems-on-chip and multicore processors has led to
complex intra-chip communication solutions.
Specifically, Networks-on-Chip (NoCs) have emerged as a
favorable fabric to provide high bandwidth and low
latency in connecting many units in a same chip. To
achieve these goals, the NoC often includes complex
components and advanced features, leading to the
development of large and highly complex interconnect
subsystems. One of the biggest challenges in these
designs is to ensure the correct functionality of this
communication infrastructure. To support this goal, an
increasing fraction of the validation effort has
shifted to post-silicon validation, because it permits
exercising network activities that are too complex to
be validated in pre-silicon. However, post-silicon
validation is hindered by the lack of observability of
the network's internal operations and thus, diagnosing
functional errors during this phase is very difficult.
In this work, we propose a post-silicon validation
platform that improves observability of network
operations by taking periodic snapshots of the traffic
traversing the network. Each node's local cache is
configured to temporarily store the snapshot logs in a
designated area reserved for post-silicon validation
and relinquished after product release. Each snapshot
log is analyzed locally by a software algorithm running
on its corresponding core, in order to detect
functional errors. Upon error detection, all snapshot
logs are aggregated at a central location to extract
additional debug data, including an overview of network
traffic surrounding the error event, as well as a
partial reconstruction of the routes followed by
packets in flight at the time. In our experiments, we
found that this approach allows us to detect several
types of functional errors, as well as observe, on
average, over 50\% of the network's traffic and
reconstruct at least half of each of their routes
through the network.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "112",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dasari:2014:NCA,
author = "Dakshina Dasari and Borislav Nikoli{\'c} and Vincent
N{\'e}lis and Stefan M. Petters",
title = "{NoC} contention analysis using a branch-and-prune
algorithm",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "113:1--113:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567937",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "``Many-core'' systems based on a Network-on-Chip (NoC)
architecture offer various opportunities in terms of
performance and computing capabilities, but at the same
time they pose many challenges for the deployment of
real-time systems, which must fulfill specific timing
requirements at runtime. It is therefore essential to
identify, at design time, the parameters that have an
impact on the execution time of the tasks deployed on
these systems and the upper bounds on the other key
parameters. The focus of this work is to determine an
upper bound on the traversal time of a packet when it
is transmitted over the NoC infrastructure. Towards
this aim, we first identify and explore some
limitations in the existing recursive-calculus-based
approaches to compute the Worst-Case Traversal Time
(WCTT) of a packet. Then, we extend the existing model
by integrating the characteristics of the tasks that
generate the packets. For this extended model, we
propose an algorithm called ``Branch and Prune'' (BP).
Our proposed method provides tighter and safe estimates
than the existing recursive-calculus-based approaches.
Finally, we introduce a more general approach, namely
``Branch, Prune and Collapse'' (BPC) which offers a
configurable parameter that provides a flexible
trade-off between the computational complexity and the
tightness of the computed estimate. The
recursive-calculus methods and BP present two special
cases of BPC when a trade-off parameter is $1$ or $
\infty $, respectively. Through simulations, we analyze
this trade-off, reason about the implications of
certain choices, and also provide some case studies to
observe the impact of task parameters on the WCTT
estimates.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "113",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lashgar:2014:HHI,
author = "Ahmad Lashgar and Ahmad Khonsari and Amirali
Baniasadi",
title = "{HARP}: {Harnessing inActive thReads in many-core
Processors}",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "114:1--114:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567938",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "SIMT accelerators are equipped with thousands of
computational resources. Conventional accelerators,
however, fail to fully utilize available resources due
to branch and memory divergences. This underutilization
is manifested in two underlying inefficiencies:
pipeline width underutilization and pipeline depth
underutilization. Width underutilization occurs when
SIMD execution units are not entirely utilized due to
branch divergences. This affects lane activity and
results in SIMD inefficiency. Depth underutilization
takes place when the pipeline runs out of active
threads and is forced to leave pipeline stages idle.
This work addresses both inefficiencies by harnessing
inactive threads available to the pipeline. We
introduce Harnessing inActive thReads in many-core
Processors (or simply HARP) to improve width and depth
utilization in accelerators. We show how using inactive
yet ready threads can enhance performance. Moreover, we
investigate implementation details and study
microarchitectural changes needed to build a
HARP-enhanced accelerator. Furthermore, we evaluate
HARP under a variety of microarchitectural design
points. We measure the area overhead associated with
HARP and compare to conventional alternatives. Under
Fermi-like GPUs, we show that HARP provides 10\%
speedup on average (maximum of 1.6X) at the cost of
3.5\% area overhead. Our analysis shows that HARP
performs better under narrower SIMD and shorter
pipelines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "114",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Banaiyanmofrad:2014:NBF,
author = "Abbas Banaiyanmofrad and Gustavo Gir{\~a}o and Nikil
Dutt",
title = "{NoC}-based fault-tolerant cache design in chip
multiprocessors",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "115:1--115:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567939",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advances in technology scaling increasingly make
emerging Chip MultiProcessor (CMP) platforms more
susceptible to failures that cause various reliability
challenges. In such platforms, error-prone on-chip
memories (caches) continue to dominate the chip area.
Also, Network-on-Chip (NoC) fabrics are increasingly
used to manage the scalability of these architectures.
We present a novel solution for efficient
implementation of fault-tolerant design of Last-Level
Cache (LLC) in CMP architectures. The proposed approach
leverages the interconnection network fabric to protect
the LLC cache banks against permanent faults in an
efficient and scalable way. During an LLC access to a
faulty block, the network detects and corrects the
faults, returning the fault-free data to the requesting
core. Leveraging the NoC interconnection fabric,
designers can implement any cache fault-tolerant scheme
in an efficient, modular, and scalable manner for
emerging multicore/manycore platforms. We propose four
different policies for implementing a remapping-based
fault-tolerant scheme leveraging the NoC fabric in
different settings. The proposed policies enable design
trade-offs between NoC traffic (packets sent through
the network) and the intrinsic parallelism of these
communication mechanisms, allowing designers to tune
the system based on design constraints. We perform an
extensive design space exploration on NoC benchmarks to
demonstrate the usability and efficacy of our approach.
In addition, we perform sensitivity analysis to observe
the behavior of various policies in reaction to
improvements in the NoC architecture. The overheads of
leveraging the NoC fabric are minimal: on an 8-core,
16-cache-bank CMP we demonstrate reliable access to
LLCs with additional overheads of less than 3\% in area
and less than 7\% in power.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "115",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bahirat:2014:MHP,
author = "Shirish Bahirat and Sudeep Pasricha",
title = "{METEOR}: Hybrid photonic ring-mesh network-on-chip
for multicore architectures",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "116:1--116:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567940",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With increasing application complexity and
improvements in process technology, Chip
MultiProcessors (CMPs) with tens to hundreds of cores
on a chip are becoming a reality. Networks-on-Chip
(NoCs) have emerged as scalable communication fabrics
that can support high bandwidths for these massively
parallel multicore systems. However, traditional
electrical NoC implementations still need to overcome
the challenges of high data transfer latencies and
large power consumption. On-chip photonic interconnects
with high performance-per-watt characteristics have
recently been proposed as an alternative to address
these challenges for intra-chip communication. In this
article, we explore using low-cost photonic
interconnects on a chip to enhance traditional
electrical NoCs. Our proposed hybrid photonic ring-mesh
NoC (METEOR) utilizes a configurable photonic ring
waveguide coupled to a traditional 2D electrical mesh
NoC. Experimental results indicate a strong motivation
to consider the proposed architecture for future CMPs,
as it can provide about 5$ \times $ reduction in power
consumption and improved throughput and access
latencies, compared to traditional electrical 2D mesh
and torus NoC architectures. Compared to other
previously proposed hybrid photonic NoC fabrics such as
the hybrid photonic torus, Corona, and Firefly, our
proposed fabric is also shown to have lower photonic
area overhead, power consumption, and energy-delay
product, while maintaining competitive throughput and
latency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "116",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Foglia:2014:ERI,
author = "Pierfrancesco Foglia and Marco Solinas",
title = "Exploiting replication to improve performances of
{NUCA-based} {CMP} systems",
journal = j-TECS,
volume = "13",
number = "3s",
pages = "117:1--117:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2566568",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Mar 24 17:17:02 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Improvements in semiconductor nanotechnology made chip
multiprocessors the reference architecture for
high-performance microprocessors. CMPs usually adopt
large Last-Level Caches (LLC) shared among cores and
private L1 caches, whose performances depend on the
wire-delay dominated response time of LLC. NUCA
(NonUniform Cache Architecture) caches represent a
viable solution for tolerating wire-delay effects. In
this article, we present Re-NUCA, a NUCA cache that
exploits replication of blocks inside the LLC to avoid
performance limitations of D-NUCA caches due to
conflicting access to shared data. Results show that a
Re-NUCA LLC permits to improve performances of more
than 5\% on average, and up to 15\% for applications
that strongly suffer from conflicting access to shared
data, while reducing network traffic and power
consumption with respect to D-NUCA caches. Besides, it
outperforms different S-NUCA schemes optimized with
victim replication.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "117",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2014:EEE,
author = "Sandeep K. Shukla",
title = "Editorial: Embedded everywhere for everyone",
journal = j-TECS,
volume = "13",
number = "4",
pages = "74:1--74:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2559122",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lam:2014:REC,
author = "Siew-Kei Lam and Thambipillai Srikanthan and
Christopher T. Clarke",
title = "Rapid evaluation of custom instruction selection
approaches with {FPGA} estimation",
journal = j-TECS,
volume = "13",
number = "4",
pages = "75:1--75:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560014",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The main aim of this article is to demonstrate that a
fast and accurate FPGA estimation engine is
indispensable in design flows for custom instruction
(template) selection. The need for a FPGA estimation
engine stems from the difficulty in predicting the FPGA
performance measures of selected custom instructions.
We will present a FPGA estimation technique that
partitions the high-level representation of custom
instructions into clusters based on the structural
organization of the target FPGA, while taking into
account general logic synthesis principles adopted by
FPGA tools. In this work, we have evaluated a widely
used graph covering algorithm with various heuristics
for custom instruction selection. In addition, we
present an algorithm called Refined Largest Fit First
(RLFF) that relies on a graph covering heuristic to
select non-overlapping superset templates, which
typically incorporate frequently used basic templates.
The initial solution is further refined by considering
overlapping templates that were ignored previously to
see if their introduction could lead to higher
performance. While RLFF provides the most efficient
cover compared to the ILP method and other graph
covering heuristics, FPGA estimation results reveals
that RLFF leads to the worst performance in certain
applications. It is therefore a worthy proposition to
equip design flows with accurate FPGA estimation in
order to rapidly determine the most profitable custom
instruction approach for a given application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Maggio:2014:TSC,
author = "Martina Maggio and Federico Terraneo and Alberto
Leva",
title = "Task scheduling: a control-theoretical viewpoint for a
general and flexible solution",
journal = j-TECS,
volume = "13",
number = "4",
pages = "76:1--76:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560015",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a new approach to the design of
task scheduling algorithms, where system-theoretical
methodologies are used throughout. The proposal implies
a significant perspective shift with respect to
mainstream design practices, but yields large payoffs
in terms of simplicity, flexibility, solution
uniformity for different problems, and possibility to
formally assess the results also in the presence of
unpredictable run-time situations. A complete
implementation example is illustrated, together with
various comparative tests, and a methodological
treatise of the matter.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dong:2014:EEE,
author = "Wei Dong and Yunhao Liu and Chun Chen and Lin Gu and
Xiaofan Wu",
title = "{Elon}: Enabling efficient and long-term reprogramming
for wireless sensor networks",
journal = j-TECS,
volume = "13",
number = "4",
pages = "77:1--77:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560017",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a new mechanism called Elon for enabling
efficient and long-term reprogramming in wireless
sensor networks. Elon reduces the transferred code size
significantly by introducing the concept of replaceable
component. It avoids the cost of hardware reboot with a
novel software reboot mechanism. Moreover, it
significantly prolongs the reprogrammable lifetime
(i.e., the time period during which the sensor nodes
can be reprogrammed) by avoiding flash writes for
TelosB nodes. Experimental results show that Elon
transfers up to 120--389 times less information than
Deluge, and 18--42 times less information than Stream.
The software reboot mechanism that Elon applies reduces
the rebooting cost by 50.4\%--53.87\% in terms of
beacon packets, and 56.83\% in terms of unsynchronized
nodes. In addition, Elon prolongs the reprogrammable
lifetime by a factor of 3.3.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2014:BAM,
author = "Shuai Li and Yuesheng Lou and Bo Liu",
title = "{Bluetooth} aided mobile phone localization: a
nonlinear neural circuit approach",
journal = j-TECS,
volume = "13",
number = "4",
pages = "78:1--78:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560018",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "It is meaningful to design a strategy to roughly
localize mobile phones without a GPS by exploiting
existing conditions and devices especially in
environments without GPS availability (e.g., tunnels,
subway stations, etc.). The availability of Bluetooth
devices for most phones and the existence of a number
of GPS equipped phones in a crowd of phone users enable
us to design a Bluetooth aided mobile phone
localization strategy. With the position of GPS
equipped phones as beacons, and with the Bluetooth
connection between neighbor phones as proximity
constraints, we formulate the problem into an
inequality problem defined on the Bluetooth network. A
recurrent neural network is developed to solve the
problem distributively in real time. The convergence of
the neural network and the solution feasibility to the
defined problem are both theoretically proven. The
hardware implementation architecture of the proposed
neural network is also given in this article. As
applications, rough localizations of drivers in a
tunnel and localization of customers in a supermarket
are explored and simulated. Simulations demonstrate the
effectiveness of the proposed method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2014:MON,
author = "Jingtong Hu and Qingfeng Zhuge and Chun Jason Xue and
Wei-Che Tseng and Edwin H.-M. Sha",
title = "Management and optimization for nonvolatile
memory-based hybrid scratchpad memory on multicore
embedded processors",
journal = j-TECS,
volume = "13",
number = "4",
pages = "79:1--79:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560019",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The recent emergence of various Non-Volatile Memories
(NVMs), with many attractive characteristics such as
low leakage power and high-density, provides us with a
new way of addressing the memory power consumption
problem. In this article, we target embedded CMPs, and
propose a novel Hybrid Scratch Pad Memory (HSPM)
architecture which consists of SRAM and NVM to take
advantage of the ultra-low leakage power, high density
of NVM, and fast access of SRAM. A novel data
allocation algorithm as well as an algorithm to
determine the NVM/SRAM ratio for the novel HSPM
architecture are proposed. The experimental results
show that the data allocation algorithm can reduce the
memory access time by 33.51\% and the dynamic energy
consumption by 16.81\% on average for the HSPM
architecture when compared with a greedy algorithm. The
NVM/SRAM size determination algorithm can further
reduce the memory access time by 14.7\% and energy
consumption by 20.1\% on average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2014:MBM,
author = "Heeseok Kim and Dong-Guk Han and Seokhie Hong and
Jaecheol Ha",
title = "Message blinding method requiring no multiplicative
inversion for {RSA}",
journal = j-TECS,
volume = "13",
number = "4",
pages = "80:1--80:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560020",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a new message blinding methods
requiring no multiplicative inversion for RSA. Most
existing message blinding methods for RSA additionally
require the multiplicative inversion, even though
computational complexity of this operation is $ O(n^3)
$ which is equal to that of the exponentiation. Thus,
this additional operation is known to be the main
drawback of the existing message blinding methods for
RSA. In addition to requiring no additional
multiplicative inversion, our new countermeasure
provides the security against various power analysis
attacks as well as general differential power
analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mahdavikhah:2014:MFP,
author = "Behzad Mahdavikhah and Ramin Mafi and Shahin
Sirouspour and Nicola Nicolici",
title = "A multiple-{FPGA} parallel computing architecture for
real-time simulation of soft-object deformation",
journal = j-TECS,
volume = "13",
number = "4",
pages = "81:1--81:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560031",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hardware-based parallel computing is proposed for
acceleration of finite-element (FE) analysis of linear
elastic deformation models. An implementation of the
Preconditioned Conjugate Gradient algorithm on N Field
Programmable Gate Array (FPGA) devices solves the large
linear system of equations arising from the FE
discretization. The system employs a large number of
customized fixed-point computing units with a
high-throughput memory architecture. An implementation
of this scalable architecture on four Altera EP3SE110
FPGA devices yields a peak performance of 604 Giga
Operations per second. This enables haptic simulation
of a 3-dimensional deformable object of 21000 elements
at an update rate of 400Hz.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Axer:2014:BTP,
author = "Philip Axer and Rolf Ernst and Heiko Falk and Alain
Girault and Daniel Grund and Nan Guan and Bengt Jonsson
and Peter Marwedel and Jan Reineke and Christine
Rochange and Maurice Sebastian and Reinhard {Von
Hanxleden} and Reinhard Wilhelm and Wang Yi",
title = "Building timing predictable embedded systems",
journal = j-TECS,
volume = "13",
number = "4",
pages = "82:1--82:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560033",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A large class of embedded systems is distinguished
from general-purpose computing systems by the need to
satisfy strict requirements on timing, often under
constraints on available resources. Predictable system
design is concerned with the challenge of building
systems for which timing requirements can be guaranteed
a priori. Perhaps paradoxically, this problem has
become more difficult by the introduction of
performance-enhancing architectural elements, such as
caches, pipelines, and multithreading, which introduce
a large degree of uncertainty and make guarantees
harder to provide. The intention of this article is to
summarize the current state of the art in research
concerning how to build predictable yet performant
systems. We suggest precise definitions for the concept
of ``predictability'', and present predictability
concerns at different abstraction levels in embedded
system design. First, we consider timing predictability
of processor instruction sets. Thereafter, we consider
how programming languages can be equipped with
predictable timing semantics, covering both a
language-based approach using the synchronous
programming paradigm, as well as an environment that
provides timing semantics for a mainstream programming
language (in this case C). We present techniques for
achieving timing predictability on multicores. Finally,
we discuss how to handle predictability at the level of
networked embedded systems where randomly occurring
errors must be considered.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bathen:2014:ERC,
author = "Luis Angel D. Bathen and Nikil D. Dutt",
title = "Embedded {RAIDs}-on-chip for bus-based
chip-multiprocessors",
journal = j-TECS,
volume = "13",
number = "4",
pages = "83:1--83:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2533316",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The dual effects of larger die sizes and technology
scaling, combined with aggressive voltage scaling for
power reduction, increase the error rates for on-chip
memories. Traditional on-chip memory reliability
techniques (e.g., ECC) incur significant power and
performance overheads. In this article, we propose a
low-power-and-performance-overhead Embedded RAID
(E-RAID) strategy and present Embedded RAIDs-on-Chip
(E-RoC), a distributed dynamically managed reliable
memory subsystem for bus-based Chip-Multiprocessors.
E-RoC achieves reliability through redundancy by
optimizing RAID-like policies tuned for on-chip
distributed memories. We achieve on-chip reliability of
memories through the use of Distributed Dynamic
ScratchPad Allocatable Memories (DSPAMs) and their
allocation policies. We exploit aggressive voltage
scaling to reduce power consumption overheads due to
parallel DSPAM accesses, and rely on the E-RoC Manager
to automatically handle any resulting
voltage-scaling-induced errors. We demonstrate how
E-RAIDs can further enhance the fault tolerance of
traditional memory reliability approaches by designing
E-RAID levels that exploit ECC. Finally, we show the
power and flexibility of the E-RoC concept by showing
the benefits of having a heterogeneous E-RAID levels
that fit each application's needs (fault tolerance,
power/energy, performance). Our experimental results on
CHStone/Mediabench II benchmarks show that our E-RAID
levels converge to 100\% error-free data rates much
faster than traditional ECC approaches. Moreover,
E-RAID levels that exploit ECC can guarantee 99.9\%
error-free data rates at ultra low Vdd on average,
where as traditional ECC approaches were able to attain
at most 99.1\% error-free data rates. We observe an
average of 22\% dynamic power consumption increase by
using traditional ECC approaches with respect to the
baseline (non-voltage scaled SPMs), whereas our E-RAID
levels are able to save dynamic power consumption by an
average of 27\% (w.r.t. the same non-voltage scaled
SPMs baseline), while incurring worst-case 2\% higher
performance overheads than traditional ECC approaches.
By voltage scaling the memories, we see that
traditional ECC approaches are able to save static
energy by 6.4\% (average), where as our E-RAID
approaches achieve 23.4\% static energy savings
(average). Finally, we observe that mixing E-RAID
levels allows us to further reduce the dynamic power
consumption by up to 55.5\% at the cost of an average
5.6\% increase in execution time over traditional
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Logaras:2014:PAE,
author = "Evangelos Logaras and Orsalia G. Hazapis and Elias S.
Manolakos",
title = "{Python} to accelerate embedded {SoC} design: a case
study for systems biology",
journal = j-TECS,
volume = "13",
number = "4",
pages = "84:1--84:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560032",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present SysPy (System Python) a tool which exploits
the strengths of the popular Python scripting language
to boost design productivity of embedded System on
Chips for FPGAs. SysPy acts as a ``glue'' software
between mature HDLs, ready-to-use VHDL components and
programmable processor soft IP cores. SysPy can be used
to: (i) automatically translate hardware components
described in Python into synthesizable VHDL, (ii)
capture top-level structural descriptions of
processor-centric SoCs in Python, (iii) implement all
the steps necessary to compile the user's C code for an
instruction set processor core and generate processor
specific Tcl scripts that import to the design project
all the necessary HDL files of the processor's
description and instantiate/connect the core to other
blocks in a synthesizable top-level Python description.
Moreover, we have developed a Hardware Abstraction
Layer (HAL) in Python which allows user applications
running in a host PC to utilize effortlessly the SoC's
resources in the FPGA. SysPy's design capabilities,
when complemented with the developed HAL software API,
provide all the necessary tools for hw/sw partitioning
and iterative design for efficient SoC's performance
tuning. We demonstrate how SysPy's design flow and
functionalities can be used by building a
processor-centric embedded SoC for computational
systems biology. The designed SoC, implemented using a
Xilinx Virtex-5 FPGA, combines the flexibility of a
programmable soft processor core (Leon3) with the high
performance of an application specific core to simulate
flexibly and efficiently the stochastic behavior of
large size biomolecular reaction networks. Such
networks are essential for studying the dynamics of
complex biological systems consisting of multiple
interacting pathways.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rodrigues:2014:LPI,
author = "Rance Rodrigues and Arunachalam Annamalai and Sandip
Kundu",
title = "A low-power instruction replay mechanism for design of
resilient microprocessors",
journal = j-TECS,
volume = "13",
number = "4",
pages = "85:1--85:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560034",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "There is a growing concern about the increasing rate
of defects in computing substrates. Traditional
redundancy solutions prove to be too expensive for
commodity microprocessor systems. Modern
microprocessors feature multiple execution units to
take advantage of instruction level parallelism.
However, most workloads do not exhibit the level of
instruction level parallelism that a typical
microprocessor is resourced for. This offers an
opportunity to reexecute instructions using idle
execution units. But, relying solely on idle resources
will not provide full instruction coverage and there is
a need to explore other alternatives. To that end, we
propose and evaluate two instruction replay schemes
within the same core for online testing of the
execution units. One scheme (RER) reexecutes only the
retired instructions, while the other (REI) reexecutes
all the issued instructions. The complete proposed
solution requires a comparator and minor modifications
to control logic, resulting in negligible hardware
overhead. Both soft and hard error detection are
considered and the performance and energy impact of
both schemes are evaluated and compared against
previously proposed redundant execution schemes.
Results show that even though the proposed schemes
result in a small performance penalty when compared to
previous work, the energy overhead is significantly
reduced.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tavana:2014:SHT,
author = "Mohammad Khavari Tavana and Nasibeh Teimouri and
Meisam Abdollahi and Maziar Goudarzi",
title = "Simultaneous hardware and time redundancy with online
task scheduling for low energy highly reliable
standby-sparing system",
journal = j-TECS,
volume = "13",
number = "4",
pages = "86:1--86:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2523781/2560035",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Standby-sparing is one of the common techniques in
order to design fault-tolerant safety-critical systems
where the high level of reliability is needed.
Recently, the minimization of energy consumption in
embedded systems has attracted a lot of concerns.
Simultaneous considering of high reliability and low
energy consumption by DVS is a challenging problem in
designing such a system, since using DVS has been shown
to reduce the reliability profoundly. In this article,
we have studied different schemes of standby-sparing
systems from the energy consumption and reliability
point of view. Moreover, we propose a new
standby-sparing scheme which addresses both reliability
and energy consumption jointly together. This scheme
uses a simple energy management coupled with an online
task scheduler which tries to dispatch those ready
tasks which are expected to lead to high reliability
and low energy consumption in the system. The
effectiveness of the proposed scheme has been shown on
TGFF under stochastic workloads. The results show 52\%
improvement on energy saving compared to the
conventional hot standby-sparing system. Moreover, two
orders of magnitude higher reliability is obtained on
average, while preserving the same level of energy
saving as compared to the state-of-the-art low-energy
standby-sparing system (LESS).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Riemens:2014:TSA,
author = "Danny P. Riemens and Georgi N. Gaydadjiev and Chris I.
de Zeeuw and Christos Strydis",
title = "Towards scalable arithmetic units with graceful
degradation",
journal = j-TECS,
volume = "13",
number = "4",
pages = "87:1--87:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2499367",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a new family of scalable
arithmetic units (ScAUs) targeting
resource-constrained, embedded devices. We, first,
study the performance, power, area and scalability
properties of general adders. Next, suitable
error-detection schemes for low-power embedded systems
are discussed. As a result, our ScAUs are enhanced with
a suitable error-detection scheme, resulting in a
Parity-Checked ScAU (PCScAU) design. The PCScAU strikes
a flexible trade-off between space and time redundancy,
offering dependability similar to high-end techniques
for the area and power cost of low-end approaches. An
alternative design, the Precision-Scalable Arithmetic
Unit (PScAU) maintains throughput with degraded
precision in case of hardware failures. The PScAU is
targeting dependable applications where latency rather
than numerical accuracy is more important. The PScAU's
downscaled mode is also interesting for runtime thermal
management due to its advantageous power consumption.
We implemented and synthesized the PCScAU, PScAU and a
few important reference designs (double-, triple- and
quadruple-modular-redundancy adders with/without input
gating) in 90- nm UMC technology. Overall, the PC-ScAU
ranks first in 9 out of 10 power-delay-area
(PDA)-product variants. It exhibits 16\% area savings
and 12\% performance speedup for 7\% increase in total
power consumption, compared to the cheapest form of
conventional hardware replication with the same fault
coverage. The PDA product of the PCScAU is, thus,
reduced by 21\%. It is interesting that, while total
power slightly increases, the PCScAU static power in
fact decreases by 14\%. Therefore, for newer technology
nodes where the static power component is significant,
the PCScAU can also achieve-next to performance and
area --- significant power improvements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2014:AWL,
author = "Sung Kyu Park and Min Kyu Maeng and Ki-Woong Park and
Kyu Ho Park",
title = "Adaptive wear-leveling algorithm for {PRAM} main
memory with a {DRAM} buffer",
journal = j-TECS,
volume = "13",
number = "4",
pages = "88:1--88:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2558427",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Phase Change RAM (PRAM) is a candidate to replace DRAM
main memory due to its low idle power consumption and
high scalability. However, its latency and endurance
have generated problems in fulfilling its main memory
role. The latency can be treated with a DRAM buffer,
but the endurance problem remains, with three critical
points that need to be improved despite the use of,
existing wear-leveling algorithms. First, existing DRAM
buffering schemes do not consider write count
distribution. Second, swapping and shifting operations
are performed statically. Finally, swapping and
shifting operations are loosely coupled with a DRAM
buffer. As a remedy to these drawbacks, we propose an
adaptive wear-leveling algorithm that consists of three
novel schemes for PRAM main memory with a DRAM buffer.
The PRAM-aware DRAM buffering scheme reduces the write
count and prevents skewed writing by considering the
write count and clean data based on the least recently
used (LRU) scheme. The adaptive multiple swapping and
shifting scheme makes the write count even with the
dynamic operation timing, the number of swapping pages
being based on the workload pattern. Our DRAM
buffer-aware swapping and shifting scheme reduces
overhead by curbing additional swapping and shifting
operations, thus reducing unnecessary write operations.
To evaluate the wear-leveling effect, we have
implemented a PIN-based wear-leveling simulator. The
evaluation confirms that the PRAM lifetime increases
from 0.68 years with the previous wear-leveling
algorithm to 5.32 years with the adaptive wear-leveling
algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anjum:2014:TTA,
author = "Omer Anjum and Mubashir Ali and Teemu Pitk{\"a}nen and
Jari Nurmi",
title = "Transport triggered architecture to perform carrier
synchronization for {LTE}",
journal = j-TECS,
volume = "13",
number = "4",
pages = "89:1--89:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560036",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article implementation of carrier frequency
offset estimate for 20MHz LTE baseband processing is
discussed. LTE (Long Term Evolution) is a wireless
communication standard that makes use of some
innovative techniques to gain very high data rates
({$>$100Mbps}). This goal for such a high throughput
also imposes design challenges for the industry and
academia such as in the case of handheld mobile devices
where the power budget is very limited. Implicitly high
throughput means we need more computation power and
more energy. On the other hand industry is also
struggling for a flexible hardware solution, or
software defined a radio (SDR), to amortize the huge
cost of required hardware changes as the wireless
standards have kept evolving. Design innovations are
now needed to confront those challenges of low power
and flexible design without changing the hardware. The
implementation is made on Transport Triggered
Architecture (TTA), which is a unique concept in
computer architecture design, based on the single
instruction, ``MOVE''. The power consumption of the
architecture when synthesized on 180nm technology at
180MHz and 1.8V is 18.39mW. The total area occupied
excluding memory is 0.6mm$^2$. The proposed TTA
solution has been compared with, a more ASIC
(application specific integrated circuits), like ASIP
(application specific instruction processor) solution
and a coprocessor accelerator-based solution. The
proposed solution is more flexible: easily programmable
due to high level language support, easily scalable,
and still efficient in energy consumption needed to
complete the CFO (carrier frequency offset) estimation
task. Because of these attractive characteristics, TTA
is also a potential candidate for SDR platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Clemente:2014:AMR,
author = "Juan Antonio Clemente and Javier Resano and Daniel
Mozos",
title = "An approach to manage reconfigurations and reduce area
cost in hard real-time reconfigurable systems",
journal = j-TECS,
volume = "13",
number = "4",
pages = "90:1--90:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560037",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a methodology for building
real-time reconfigurable systems that ensures that all
the temporal constraints of a set of applications are
met while optimizing the utilization of the available
reconfigurable resources. Starting from a static
platform that meets all the real-time deadlines, our
approach takes advantage of runtime reconfiguration in
order to reduce the area needed while guaranteeing that
all the deadlines are still met. This goal is achieved
by identifying which tasks must be always ready for
execution in order to meet the deadlines and by means
of a methodology that also allows reducing the area
requirements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dewan:2014:BAF,
author = "Farhana Dewan and Nathan Fisher",
title = "Bandwidth allocation for fixed-priority-scheduled
compositional real-time systems",
journal = j-TECS,
volume = "13",
number = "4",
pages = "91:1--91:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560038",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent research in compositional real-time systems has
focused on determination of a component's real-time
interface parameters. An important objective in
interface-parameter determination is minimizing the
bandwidth allocated to each component of the system
while simultaneously guaranteeing component
schedulability. With this goal in mind, in this
article, we explore fixed-priority schedulability in
compositional setting. First we derive an efficient
exact test based on iterative convergence for sporadic
task systems scheduled by fixed-priority (e.g.,
deadline monotonic, rate monotonic) upon an
explicit-deadline periodic (EDP) resource. Then we
address the time complexity of the exact test by
developing a fully-polynomial-time approximation scheme
(FPTAS) for allocating bandwidth to components. Our
parametric algorithm takes the task system and an
accuracy parameter $ \epsilon > 0 $ as input and
returns a bandwidth which is guaranteed to be at most a
factor $ (1 + \epsilon) $ times the optimal minimum
bandwidth required to successfully schedule the task
system. We perform thorough simulation over
synthetically generated task systems to compare the
performance of our proposed efficient-exact and the
approximate algorithm and observe a significant
decrease in runtime and a very small relative error
when comparing the approximate algorithm with the exact
algorithm and the sufficient algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2014:EIE,
author = "I-Wei Wu and Jean Jyh-Jiun Shann and Wei-Chung Hsu and
Chung-Ping Chung",
title = "Extended Instruction Exploration for Multiple-Issue
Architectures",
journal = j-TECS,
volume = "13",
number = "4",
pages = "92:1--92:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560039",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In order to satisfy the growing demand for
high-performance computing in modern embedded devices,
several architectural and microarchitectural
enhancements have been implemented in processor
architectures. Extended instruction (EI) is often used
for architectural enhancement, while issuing multiple
instructions is a common approach for
microarchitectural enhancement. The impact of combining
both of these approaches in the same design is not well
understood. While previous studies have shown that EI
can potentially improve performance in some
applications on certain multiple-issue architectures,
the algorithms used to identify EI for multiple-issue
architectures yield only limited performance
improvement. This is because not all arithmetic
operations are suited for EI for multiple-issue
architectures. To explore the full potential of EI for
multiple-issue architectures, two important factors
need to be considered: (1) the execution performance of
an application is dominated by critical (located on the
critical path) and highly resource-contentious (i.e.,
having a high probability of being delayed during
execution due to hardware resource limitations)
operations, and (2) an operation may become critical
and/or highly resource contentious after some
operations are added to the EI. This article presents
an EI exploration algorithm for multiple-issue
architectures that focuses on these two factors.
Simulation results show that the proposed algorithm
outperforms previously published algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Moussalli:2014:SPX,
author = "Roger Moussalli and Mariam Salloum and Robert Halstead
and Walid Najjar and Vassilis J. Tsotras",
title = "A study on parallelizing {XML} path filtering using
accelerators",
journal = j-TECS,
volume = "13",
number = "4",
pages = "93:1--93:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560040",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Publish-subscribe systems present the state of the art
in information dissemination to multiple users. Such
systems have evolved from simple topic-based to the
current XML-based systems. XML-based pub-sub systems
provide users with more flexibility by allowing the
formulation of complex queries on the content as well
as the structure of the streaming messages. Messages
that match a given user query are forwarded to the
user. This article examines how to exploit the
parallelism found in XPath filtering. Using an incoming
XML stream, parsing and matching thousands of user
profiles are performed simultaneously by matching
engines. We show the benefits and trade-offs of mapping
the proposed filtering approach onto FPGAs, processing
streams of XML at wire speed, and GPUs, providing the
flexibility of software. This is in contrast to
conventional approaches bound by the sequential aspect
of software computing, associated with a large memory
footprint. By converting XPath expressions into custom
stacks, our solution is the first to provide support
for complex XPath structural constructs, such as
parent-child and ancestor descendant relations, whilst
allowing wildcarding and recursion. The measured
speedups resulting from the GPU and FPGA accelerations
versus single-core CPUs are up to 6.6X and 2.5 orders
of magnitude, respectively. The FPGA approaches are up
to 31X faster than software running on 12 CPU cores.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2014:PRR,
author = "Hengchang Liu and Pan Hui and Zhiheng Xie and Jingyuan
Li and David Siu and Gang Zhou and Liusheng Huang and
John A. Stankovic",
title = "Providing reliable and real-time delivery in the
presence of body shadowing in breadcrumb systems",
journal = j-TECS,
volume = "13",
number = "4",
pages = "94:1--94:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2557633",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The primary goal of breadcrumb trail sensor networks
is to transmit in real-time users' physiological
parameters that measure life-critical functions to an
incident commander through reliable multihop
communication. In applications using breadcrumb
solutions, there are often many users working together,
and this creates a well-known body shadowing effect
(BSE). In this article, we first measure the
characteristics of body shadowing for 2.4GHz sensor
nodes. Our empirical results show that the body
shadowing effect leads to severe packet loss and
consequently very poor real-time performance. Then we
develop a novel Intentional Forwarding solution. This
solution accurately detects the shadowing mode and
enables selected neighbors to forward data packets.
Experimental results from a fully implemented testbed
demonstrate that Intentional Forwarding is able to
improve the end-to-end average packet delivery ratio
(PDR) from 58\% to 93\% and worst-case PDR from 45\% to
85\%, and is able to meet soft real-time requirements
even under severe body shadowing problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gal:2014:GLC,
author = "Bertrand {Le Gal} and Christophe Jego",
title = "{GPU-like} on-chip system for decoding {LDPC} codes",
journal = j-TECS,
volume = "13",
number = "4",
pages = "95:1--95:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2538668",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Rapid prototyping is an important step in the
development and the verification of computationally
demanding tasks of digital communication systems, such
as Forward Error Correction (FEC) decoding. The goal is
to replace time-consuming simulations based on abstract
models of the system with real-time experiments under
real-world conditions. GPU-like architecture is a
promising approach to fully exploit the potential of
FPGA-based acceleration platforms. In this article, an
application-specific GPU-like architecture and a
complete compilation framework for decoding LDPC codes
are proposed. The interest in an application-specific
GPU in comparison with current GPUs is detailed.
Finally, real-time experimentations demonstrate the
potential of the GPU-like decoder to investigate both
algorithmic and architectural issues.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khan:2014:OLT,
author = "Umair Ali Khan and Bernhard Rinner",
title = "Online learning of timeout policies for dynamic power
management",
journal = j-TECS,
volume = "13",
number = "4",
pages = "96:1--96:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2529992",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dynamic power management (DPM) refers to strategies
which selectively change the operational states of a
device during runtime to reduce the power consumption
based on the past usage pattern, the current workload,
and the given performance constraint. The power
management problem becomes more challenging when the
workload exhibits nonstationary behavior which may
degrade the performance of any single or static DPM
policy. This article presents a reinforcement learning
(RL)-based DPM technique for optimal selection of
timeout values in the different device states. Each
timeout period determines how long the device will
remain in a particular state before the transition
decision is taken. The timeout selection is based on
workload estimates derived from a Multilayer Artificial
Neural Network (ML-ANN) and an objective function given
by weighted performance and power parameters. Our DPM
approach is further able to adapt the power-performance
weights online to meet user-specified power and
performance constraints, respectively. We have
completely implemented our DPM algorithm on our
embedded traffic surveillance platform and performed
long-term experiments using real traffic data to
demonstrate the effectiveness of the DPM. Our results
show that the proposed learning algorithm not only
adequately explores the power-performance trade-off
with nonstationary workload but can also successfully
perform online adjustment of the trade-off parameter in
order to meet the user-specified constraint.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gong:2014:SBF,
author = "Lingkan Gong and Oliver Diessel",
title = "Simulation-based functional verification of
dynamically reconfigurable systems",
journal = j-TECS,
volume = "13",
number = "4",
pages = "97:1--97:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560042",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dynamically reconfigurable systems (DRS) implemented
using field-programmable gate arrays (FPGAs) allow
hardware logic to be partially reconfigured while the
rest of the design continues to operate. By mapping
multiple reconfigurable hardware modules to the same
physical region of an FPGA, such systems are able to
time-multiplex their modules at runtime and adapt
themselves to changing execution requirements. This
architectural flexibility introduces challenges for
verifying system functionality. New simulation
approaches are required to extend traditional
simulation techniques to assist designers in testing
and debugging the time-varying behavior of DRS. This
article summarizes our previous work on ReSim, the
first tool to allow cycle-accurate yet physically
independent simulation of a DRS reconfiguring both its
logic and state. Furthermore, ReSim-based simulation
does not require changing the design for simulation
purposes and thereby verifies the implementation-ready
design instead of a variation of the design. We discuss
the conflicting requirements of simulation accuracy and
verification productivity in verifying DRS designs and
describe our approach to resolve this challenge.
Through a range of case studies, we demonstrate that
ReSim assists designers in detecting fabric-independent
bugs of DRS designs and helps to achieve verification
closure of DRS design projects.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guimbretiere:2014:ADP,
author = "Fran{\c{c}}ois Guimbreti{\'e}re and Shenwei Liu and
Han Wang and Rajit Manohar",
title = "An asymmetric dual-processor architecture for
low-power information appliances",
journal = j-TECS,
volume = "13",
number = "4",
pages = "98:1--98:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560538",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Mar 11 18:33:06 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As users become increasingly conscious of their energy
footprint-either to improve battery life or to respect
the environment-improved energy efficiency of systems
has gained in importance. This is especially important
in the context of information appliances such as e-book
readers that are meant to replace books, since their
energy efficiency impacts how long the appliance can be
used on a single charge of the battery. In this
article, we present a new software and hardware
architecture for information appliances that provides
significant advantages in terms of device lifetime. The
architecture combines a low-power microcontroller with
a high-performance application processor, where the
low-power microcontroller is used to handle simple user
interactions (e.g., turning pages, inking, entering
text) without waking up the main application processor.
We demonstrate how this architecture is easily adapted
to the traditional way of building user interfaces
using a user interface markup language. We report on
our initial measurements using an E Ink-based
prototype. When comparing our hybrid architecture to a
simpler solution we found that we can increase the
battery life by a factor of 1.72 for a reading task and
by a factor of 3.23 for a writing task. We conclude by
presenting design guidelines aimed at optimizing the
overall energy signature of information appliances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anonymous:2014:AOS,
author = "Anonymous",
title = "Abstracts: Online Supplements Volume 13, Number 1s
Volume 13, Number 2s Volume 13, Number 3s Volume 13,
Number 4s Volume 13, Number 5s",
journal = j-TECS,
volume = "13",
number = "4",
pages = "99:1--99:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2688494.2688495",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 5 18:52:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Natale:2014:ESI,
author = "Marco {Di Natale} and Rich West and Jian-Jia Chen and
Rahul Mangharam",
title = "Editorial: Special issue on real-time and embedded
technology and applications",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "119:1--119:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2588608",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "119",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Whitham:2014:ERC,
author = "Jack Whitham and Neil C. Audsley and Robert I. Davis",
title = "Explicit reservation of cache memory in a predictable,
preemptive multitasking real-time system",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "120:1--120:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2523070",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We describe and evaluate explicit reservation of cache
memory to reduce the cache-related preemption delay
(CRPD) observed when tasks share a cache in a
preemptive multitasking hard real-time system. We
demonstrate the approach using measurements obtained
from a hardware prototype, and present schedulability
analyses for systems that share a cache by explicit
reservation. These analyses form the basis for a series
of experiments to further evaluate the approach. We
find that explicit reservation is most useful for
larger task sets with high utilization. Some task sets
cannot be scheduled with a conventional cache, but are
schedulable with explicit reservation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "120",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nirjon:2014:MSR,
author = "Shahriar Nirjon and Angela Nicoara and Cheng-Hsin Hsu
and Jatinder Pal Singh and John A. Stankovic",
title = "{MultiNets}: a system for real-time switching between
multiple network interfaces on mobile devices",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "121:1--121:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2489788",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "MultiNets is a system supporting seamless switch-over
between wireless interfaces on mobile devices in
real-time. MultiNets is configurable to run in three
different modes: (i) Energy Saving mode --for choosing
the interface that saves the most energy based on the
condition of the device, (ii) Offload mode --for
offloading data traffic from the cellular to WiFi
network, and (iii) Performance mode --for selecting the
network for the fastest data connectivity. MultiNets
also provides a powerful API that gives the application
developers: (i) the choice to select a network
interface to communicate with a specific server, and
(ii) the ability to simultaneously transfer data over
multiple network interfaces. MultiNets is modular,
easily integrable, lightweight, and applicable to
various mobile operating systems. We implement
MultiNets on Android devices as a show case. MultiNets
does not require any extra support from the network
infrastructure and runs existing applications
transparently. To evaluate MultiNets, we first collect
data traces from 13 actual Android smartphone users
over three months. We then use the collected traces to
show that, by automatically switching to WiFi whenever
it is available, MultiNets can offload on average
79.82\% of the data traffic. We also illustrate that,
by optimally switching between the interfaces,
MultiNets can save on average 21.14 KJ of energy per
day, which is equivalent to 27.4\% of the daily energy
usage. Using our API, we demonstrate that a video
streaming application achieves 43--271\% higher
streaming rate when concurrently using WiFi and 3G
interfaces. We deploy MultiNets in a real-world
scenario and our experimental results show that
depending on the user requirements, it outperforms the
state-of-the-art Android system either by saving up to
33.75\% energy, achieving near-optimal offloading, or
achieving near-optimal throughput while substantially
reducing TCP interruptions due to switching.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "121",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kumar:2014:WCG,
author = "Pratyush Kumar and Lothar Thiele",
title = "Worst-case guarantees on a processor with
temperature-based feedback control of speed",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "122:1--122:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584611",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "On-chip temperatures continue to rise, in spite of
design efforts towards more efficient cooling and novel
low-power technologies. Run-time thermal management
techniques, such as speed scaling and system
throttling, constitute a standard component in today's
processors. One such technique is the feedback control
of the processing speed based on the on-chip
temperature. If suitably designed, such a controller
can ensure that the temperature of the processor does
not exceed a given bound, independent of the
application. Such isolation of needs is encouraging.
However, from the application's stand-point, such a
processor must provide performance guarantees; in
particular, the guarantee that real-time jobs do not
have worst-case delays larger than their relative
deadlines. For applications which exhibit variability,
such as bursty arrival patterns, computing such
guarantees is not apparent. As key enablers in such a
computation, for the specific setting of
First-Come-First-Serve (FCFS) scheduling, we (a) define
and prove a monotonicity principle satisfied by the
processor with the said controller, and (b) propose a
thermally clipped processor model. We identify the
worst-case trace simulating which on a suitably chosen
thermally clipped processor provides the tight
upper-bound on the worst-case delay. These results hold
for general models of (a) the power consumption of the
processor, (b) its thermal model, (c) the speed scaling
law, and (d) the task model. For this modelling scope,
we show that the same worst-case trace also leads to
the worst-case temperature of the processor. This is
useful to characterise tasks which do not load the
processor sufficiently to hit the given peak
temperature bound. We demonstrate the utility of this
calculation by designing a shaper to delay the arrival
times of jobs and thereby restrict the observed
worst-case temperature while still meeting the task's
deadlines.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "122",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guan:2014:WAM,
author = "Nan Guan and Mingsong Lv and Wang Yi and Ge Yu",
title = "{WCET} analysis with {MRU} cache: Challenging {LRU}
for predictability",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "123:1--123:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584655",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most previous work on cache analysis for WCET
estimation assumes a particular replacement policy
called LRU. In contrast, much less work has been done
for non-LRU policies, since they are generally
considered to be very unpredictable. However, most
commercial processors are actually equipped with these
non-LRU policies, since they are more efficient in
terms of hardware cost, power consumption and thermal
output, while still maintaining almost as good
average-case performance as LRU. In this work, we study
the analysis of MRU, a non-LRU replacement policy
employed in mainstream processor architectures like
Intel Nehalem. Our work shows that the predictability
of MRU has been significantly underestimated before,
mainly because the existing cache analysis techniques
and metrics do not match MRU well. As our main
technical contribution, we propose a new cache hit/miss
classification, k -Miss, to better capture the MRU
behavior, and develop formal conditions and efficient
techniques to decide k -Miss memory accesses. A
remarkable feature of our analysis is that the k -Miss
classifications under MRU are derived by the analysis
result of the same program under LRU. Therefore, our
approach inherits the advantages in efficiency and
precision of the state-of-the-art LRU analysis
techniques based on abstract interpretation.
Experiments with instruction caches show that our
proposed MRU analysis has both good precision and high
efficiency, and the obtained estimated WCET is rather
close to (typically 1\% to 8\% more than) that obtained
by the state-of-the-art LRU analysis, which indicates
that MRU is also a good candidate for cache replacement
policies in real-time systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "123",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chattopadhyay:2014:UWA,
author = "Sudipta Chattopadhyay and Lee Kee Chong and Abhik
Roychoudhury and Timon Kelter and Peter Marwedel and
Heiko Falk",
title = "A Unified {WCET} analysis framework for multicore
platforms",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "124:1--124:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584654",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the advent of multicore architectures, worst-case
execution time (WCET) analysis has become an
increasingly difficult problem. In this article, we
propose a unified WCET analysis framework for multicore
processors featuring both shared cache and shared bus.
Compared to other previous works, our work differs by
modeling the interaction of shared cache and shared bus
with other basic microarchitectural components (e.g.,
pipeline and branch predictor). In addition, our
framework does not assume a timing anomaly free
multicore architecture for computing the WCET. A
detailed experiment methodology suggests that we can
obtain reasonably tight WCET estimates in a wide range
of benchmark programs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "124",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2014:CCL,
author = "Xiuming Zhu and Pei-Chi Huang and Jianyong Meng and
Song Han and Aloysius K. Mok and Deji Chen and Mark
Nixon",
title = "{ColLoc}: a collaborative location and tracking system
on {WirelessHART}",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "125:1--125:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584656",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Localization in wireless sensor networks is an
important functionality that is required for tracking
personnel and assets in industrial environments,
especially for emergency response. Current commercial
localization systems such as GPS suffer from the
limitations of either high cost or low availability in
many situations (e.g., indoor environments that exclude
direct line-of-sight signal reception). The development
of industrial wireless sensor networks such as
WirelessHART provides an alternative. In this article,
we present the design and implementation of ColLoc: a
collaborative location and tracking system on
WirelessHART as an industrially viable solution. This
solution is built upon several technological advances.
First, ColLoc adds the roaming functionality to
WirelessHART and thus provides a means for keeping
mobile WirelessHART devices connected to the network.
Second, ColLoc employs a collaborative framework to
integrate different types of distance measurements into
the location estimation algorithm by weighing them
according to their precision levels. ColLoc adopts
several novel techniques to improve distance estimation
accuracy and decreases the RSSI presurvey cost. These
techniques include introducing distance error range
constraints to the measurements, judiciously selecting
the initial point in location estimation and online
updating the signal propagation models in the anchor
nodes, integrating Extended Kalman Filter (EKF) with
trilateration to track moving objects. Our
implementation of ColLoc can be applied to any
WirelessHART-conforming network because no modification
is needed on the WirelessHART field devices. We have
implemented a complete ColLoc system to validate both
the design and the effectiveness of our localization
algorithm. Our experiments show that the mobile device
never drops out of the WirelessHART network while
moving around; with the help of even one dependable
anchor, using RSSI can yield at least 75\% of distance
errors below 5 meters, which is quite acceptable for
many typical industrial automation applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "125",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2014:IEM,
author = "Huang-Ming Huang and Christopher Gill and Chenyang
Lu",
title = "Implementation and evaluation of mixed-criticality
scheduling approaches for sporadic tasks",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "126:1--126:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584612",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
note = "See corrections and comments
\cite{Fleming:2017:CDI}.",
abstract = "Traditional fixed-priority scheduling analysis for
periodic and sporadic task sets is based on the
assumption that all tasks are equally critical to the
correct operation of the system. Therefore, every task
has to be schedulable under the chosen scheduling
policy, and estimates of tasks' worst-case execution
times must be conservative in case a task runs longer
than is usual. To address the significant
underutilization of a system's resources under normal
operating conditions that can arise from these
assumptions, several mixed-criticality scheduling
approaches have been proposed. However, to date, there
have been few quantitative comparisons of system
schedulability or runtime overhead for the different
approaches. In this article, we present a side-by-side
implementation and evaluation of the known
mixed-criticality scheduling approaches, for periodic
and sporadic mixed-criticality tasks on uniprocessor
systems, under a mixed-criticality scheduling model
that is common to all these approaches. To make a fair
evaluation of mixed-criticality scheduling, we also
address previously open issues and propose
modifications to improve particular approaches. Our
empirical evaluations demonstrate that user-space
implementations of mechanisms to enforce different
mixed-criticality scheduling approaches can be achieved
atop Linux without kernel modification, with reasonably
low (but in some cases nontrivial) overhead for
mixed-criticality real-time task sets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "126",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pajic:2014:SCM,
author = "Miroslav Pajic and Zhihao Jiang and Insup Lee and Oleg
Sokolsky and Rahul Mangharam",
title = "Safety-critical medical device development using the
{UPP2SF} model translation tool",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "127:1--127:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584651",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Software-based control of life-critical embedded
systems has become increasingly complex, and to a large
extent has come to determine the safety of the human
being. For example, implantable cardiac pacemakers have
over 80,000 lines of code which are responsible for
maintaining the heart within safe operating limits. As
firmware-related recalls accounted for over 41\% of the
600,000 devices recalled in the last decade, there is a
need for rigorous model-driven design tools to generate
verified code from verified software models. To this
effect, we have developed the UPP2SF model-translation
tool, which facilitates automatic conversion of
verified models (in UPPAAL) to models that may be
simulated and tested (in Simulink/Stateflow). We
describe the translation rules that ensure correct
model conversion, applicable to a large class of
models. We demonstrate how UPP2SF is used in the
model-driven design of a pacemaker whose model is (a)
designed and verified in UPPAAL (using timed automata),
(b) automatically translated to Stateflow for
simulation-based testing, and then (c) automatically
generated into modular code for hardware-level
integration testing of timing-related errors. In
addition, we show how UPP2SF may be used for worst-case
execution time estimation early in the design stage.
Using UPP2SF, we demonstrate the value of integrated
end-to-end modeling, verification, code-generation and
testing process for complex software-controlled
embedded systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "127",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saifullah:2014:NOR,
author = "Abusayeed Saifullah and Chengjie Wu and Paras Babu
Tiwari and You Xu and Yong Fu and Chenyang Lu and Yixin
Chen",
title = "Near optimal rate selection for wireless control
systems",
journal = j-TECS,
volume = "13",
number = "4s",
pages = "128:1--128:??",
month = apr,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2584652",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Apr 4 18:59:24 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the advent of industrial standards such as
WirelessHART, process industries are now gravitating
towards wireless control systems. Due to limited
bandwidth in a wireless network shared by multiple
control loops, it is critical to optimize the overall
control performance. In this article, we address the
scheduling-control co-design problem of determining the
optimal sampling rates of feedback control loops
sharing a WirelessHART network. The objective is to
minimize the overall control cost while ensuring that
all data flows meet their end-to-end deadlines. The
resulting constrained optimization based on existing
delay bounds for WirelessHART networks is challenging
since it is nondifferentiable, nonlinear, and not in
closed-form. We propose four methods to solve this
problem. First, we present a subgradient method for
rate selection. Second, we propose a greedy heuristic
that usually achieves low control cost while
significantly reducing the execution time. Third, we
propose a global constrained optimization algorithm
using a simulated annealing (SA) based penalty method.
We study SA method under both constant factor penalty
and adaptive penalty. Finally, we formulate rate
selection as a differentiable convex optimization
problem that provides a quick solution through a convex
optimization technique. This is based on a new delay
bound that is convex and differentiable, and hence
simplifies the optimization problem. We study both the
gradient descent method and the interior point method
to solve it. We evaluate all methods through
simulations based on topologies of a 74-node wireless
sensor network testbed. The subgradient method is
disposed to incur the longest execution time as well as
the highest control cost among all methods. Among the
SA-based constant penalty method, the greedy heuristic,
and the gradient descent method, the first two
represent the opposite ends of the tradeoff between
control cost and execution time, while the third one
hits the balance between the two. We further observe
that the SA based adaptive penalty method is superior
to the constant penalty method, and that the interior
point method is superior to the gradient method. Thus,
the interior point method and the SA-based adaptive
penalty method are the two most effective approaches
for rate selection. While both methods are competitive
against each other in terms of control cost, the
interior point method is significantly faster than the
penalty method. As a result, the interior point method
upon convex relaxation is more suitable for online rate
adaptation than the SA based adaptive penalty method
due to their significant difference in run-time
efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "128",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hettiarachchi:2014:DAF,
author = "Pradeep M. Hettiarachchi and Nathan Fisher and Masud
Ahmed and Le Yi Wang and Shinan Wang and Weisong Shi",
title = "A Design and Analysis Framework for Thermal-Resilient
Hard Real-Time Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "146:1--146:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632154",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We address the challenge of designing predictable
real-time systems in an unpredictable thermal
environment where environmental temperature may
dynamically change (e.g., implantable medical devices).
Towards this challenge, we propose a control-theoretic
design methodology that permits a system designer to
specify a set of hard real-time performance modes under
which the system may operate. The system automatically
adjusts the real-time performance mode based on the
external thermal stress. We show (via analysis,
simulations, and a hardware testbed implementation)
that our control design framework is stable and control
performance is equivalent to previous real-time thermal
approaches, even under dynamic temperature changes. A
crucial and novel advantage of our framework over
previous real-time control is the ability to guarantee
hard deadlines even under transitions between modes.
Furthermore, our system design permits the calculation
of a new metric called thermal resiliency that
characterizes the maximum external thermal stress that
any hard real-time performance mode can withstand.
Thus, our design framework and analysis may be
classified as a thermal stress analysis for real-time
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "146",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chattopadhyay:2014:CRP,
author = "Sudipta Chattopadhyay and Abhik Roychoudhury",
title = "Cache-Related Preemption Delay Analysis for Multilevel
Noninclusive Caches",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "147:1--147:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632156",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the rapid growth of complex hardware features,
timing analysis has become an increasingly difficult
problem. The key to solving this problem lies in the
precise and scalable modeling of performance-enhancing
processor features (e.g., cache). Moreover, real-time
systems are often multitasking and use preemptive
scheduling, with fixed or dynamic priority assignment.
For such systems, cache related preemption delay (CRPD)
may increase the execution time of a task. Therefore,
CRPD may affect the overall schedulability analysis.
Existing works propose to bound the value of CRPD in a
single-level cache. In this article, we propose a CRPD
analysis framework that can be used for a two-level,
noninclusive cache hierarchy. In addition, our proposed
framework is also applicable in the presence of shared
caches. We first show that CRPD analysis faces several
new challenges in the presence of a multilevel,
noninclusive cache hierarchy. Our proposed framework
overcomes all such challenges and we can formally prove
the correctness of our framework. We have performed
experiments with several subject programs, including an
unmanned aerial vehicle (UAV) controller and an in-situ
space debris monitoring instrument. Our experimental
results suggest that we can provide sound and precise
CRPD estimates using our framework.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "147",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paul:2014:RTP,
author = "Anand Paul",
title = "Real-Time Power Management for Embedded {M2M} Using
Intelligent Learning Methods",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "148:1--148:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632158",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this work, an embedded system working model is
designed with one server that receives requests by a
requester by a service queue that is monitored by a
Power Manager (PM). A novel approach is presented based
on reinforcement learning to predict the best policy
amidst existing DPM policies and deterministic
Markovian nonstationary policies (DMNSP). We apply
reinforcement learning, namely a computational approach
to understanding and automating goal-directed learning
that supports different devices according to their DPM.
Reinforcement learning uses a formal framework defining
the interaction between agent and environment in terms
of states, response action, and reward points. The
capability of this approach is demonstrated by an
event-driven simulator designed using Java with a
power-manageable machine-to-machine device. Our
experiment result shows that the proposed dynamic power
management with timeout policy gives average power
saving from 4\% to 21\% and the novel dynamic power
management with DMNSP gives average power saving from
10\% to 28\% more than already proposed DPM policies.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "148",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zeng:2014:MSC,
author = "Haibo Zeng and Marco {Di Natale} and Qi Zhu",
title = "Minimizing Stack and Communication Memory Usage in
Real-Time Embedded Applications",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "149:1--149:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632160",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the development of real-time embedded applications,
especially those on systems-on-chip, an efficient use
of RAM memory is as important as the effective
scheduling of the computation resources. The protection
of communication and state variables accessed by
concurrent tasks must provide real-time schedulability
guarantees while using the least amount of memory.
Several schemes, including preemption thresholds, have
been developed to improve schedulability and save stack
space by selectively disabling preemption. However, the
design synthesis problem is still open. In this
article, we target the assignment of the scheduling
parameters to minimize memory usage for systems of
practical interest, including designs compliant with
automotive standards. We propose algorithms either
proven optimal or shown to improve on randomized
optimization methods like simulated annealing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "149",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chakraborty:2014:MCH,
author = "Arup Chakraborty and Houman Homayoun and Amin Khajeh
and Nikil Dutt and Ahmed Eltawil and Fadi Kurdahi",
title = "Multicopy Cache: a Highly Energy-Efficient Cache
Architecture",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "150:1--150:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632162",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Caches are known to consume a large part of total
microprocessor energy. Traditionally, voltage scaling
has been used to reduce both dynamic and leakage power
in caches. However, aggressive voltage reduction causes
process-variation-induced failures in cache SRAM
arrays, thus compromising cache reliability. We present
MultiCopy Cache (MC$^2$), a new cache architecture that
achieves significant reduction in energy consumption
through aggressive voltage scaling while maintaining
high error resilience (reliability) by exploiting
multiple copies of each data item in the cache. Unlike
many previous approaches, MC$^2$ does not require any
error map characterization and therefore is responsive
to changing operating conditions (e.g., Vdd noise,
temperature, and leakage) of the cache. MC$^2$ also
incurs significantly lower overheads compared to other
ECC-based caches. Our experimental results on embedded
benchmarks demonstrate that MC$^2$ achieves up to 60\%
reduction in energy and energy-delay product (EDP) with
only 3.5\% reduction in IPC and no appreciable area
overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "150",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hanumaiah:2014:SST,
author = "Vinay Hanumaiah and Digant Desai and Benjamin Gaudette
and Carole-Jean Wu and Sarma Vrudhula",
title = "{STEAM}: a Smart Temperature and Energy Aware
Multicore Controller",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "151:1--151:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661430",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent empirical studies have shown that multicore
scaling is fast becoming power limited, and
consequently, an increasing fraction of a multicore
processor has to be under clocked or powered off.
Therefore, in addition to fundamental innovations in
architecture, compilers and parallelization of
application programs, there is a need to develop
practical and effective dynamic energy management (DEM)
techniques for multicore processors. Existing DEM
techniques mainly target reducing processor power
consumption and temperature, and only few of them have
addressed improving energy efficiency for multicore
systems. With energy efficiency taking a center stage
in all aspects of computing, the focus of the DEM needs
to be on finding practical methods to maximize
processor efficiency. Towards this, this article
presents STEAM --- an optimal closed-loop DEM
controller designed for multicore processors. The
objective is to maximize energy efficiency by dynamic
voltage and frequency scaling (DVFS). Energy efficiency
is defined as the ratio of performance to power
consumption or performance-per-watt (PPW). This is the
same as the number of instructions executed per Joule.
The PPW metric is actually replaced by $ P^\alpha $ PW
(performance$^\alpha $-per-Watt), which allows for
controlling the importance of performance versus power
consumption by varying $ \alpha $. The proposed
controller was implemented on a Linux system and tested
with the Intel Sandy Bridge processor. There are three
power management schemes called governors, available
with Intel platforms. They are referred to as (1)
Powersave (lowest power consumption), (2) Performance
(achieves highest performance), and (3) Ondemand. Our
simple and lightweight controller when executing SPEC
CPU2006, PARSEC, and MiBench benchmarks have achieved
an average of 18\% improvement in energy efficiency
(MIPS/Watt) over these ACPI policies. Moreover, STEAM
also demonstrated an excellent prediction of core
temperatures and power consumption, and the ability to
control the core temperatures within $ 3^\circ $C of
the specified maximum. Finally, the overhead of the
STEAM implementation (in terms of CPU resources) is
less than 0.25\%. The entire implementation is
self-contained and can be installed on any processor
with very little prior knowledge of the processor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "151",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rossebo:2014:ISI,
author = "Judith E. Y. Rosseb{\o} and Siv Hilde Houmb and Geri
Georg and Virginia N. L. Franqueira and Dimitrios
Serpanos",
title = "Introduction to Special Issue on Risk and Trust in
Embedded Critical Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "152:1--152:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2659008",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "152",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dunbar:2014:DTE,
author = "Carson Dunbar and Gang Qu",
title = "Designing Trusted Embedded Systems from Finite State
Machines",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "153:1--153:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638555",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sequential components are crucial for a real-time
embedded system as they control the system based on the
system's current state and real life input. In this
article, we explore the security and trust issues of
sequential system design from the perspective of a
finite state machine (FSM), which is the most popular
model used to describe sequential systems.
Specifically, we find that the traditional FSM
synthesis procedure will introduce security risks and
cannot guarantee trustworthiness in the implemented
circuits. Indeed, we show that not only do there exist
simple and effective ways to attack a sequential
system, it is also possible to insert a hardware Trojan
Horse into the design without introducing any
significant design overhead. We then formally define
the notion of trust in FSM and propose a novel approach
to designing trusted circuits from the FSM
specification. We demonstrate both our findings on the
security threats and the effectiveness of our proposed
method on Microelectronics Center of North Carolina
(MCNC) sequential circuit benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "153",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dua:2014:CSS,
author = "Akshay Dua and Nirupama Bulusu and Wu-Chang Feng and
Wen Hu",
title = "Combating Software and {Sybil} Attacks to Data
Integrity in Crowd-Sourced Embedded Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "154:1--154:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629338",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Crowd-sourced mobile embedded systems allow people to
contribute sensor data, for critical applications,
including transportation, emergency response and
eHealth. Data integrity becomes imperative as malicious
participants can launch software and Sybil attacks
modifying the sensing platform and data. To address
these attacks, we develop (1) a Trusted Sensing
Peripheral (TSP) enabling collection of high-integrity
raw or aggregated data, and participation in
applications requiring additional modalities; and (2) a
Secure Tasking and Aggregation Protocol (STAP) enabling
aggregation of TSP trusted readings by untrusted
intermediaries, while efficiently detecting
fabricators. Evaluations demonstrate that TSP and STAP
are practical and energy-efficient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "154",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2014:ISI,
author = "Li-Pin Chang and Tei-Wei Kuo and Chris Gill and Jin
Nakazawa",
title = "Introduction to the Special Issue on Real-Time,
Embedded and Cyber-Physical Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "155:1--155:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660488",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "155",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Qiu:2014:BPD,
author = "Keni Qiu and Mengying Zhao and Chun Jason Xue and Alex
Orailoglu",
title = "Branch Prediction-Directed Dynamic Instruction Cache
Locking for Embedded Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "156:1--156:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660492",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cache locking is a cache management technique to
preclude the replacement of locked cache contents.
Cache locking is often adopted to improve cache access
predictability in Worst-Case Execution Time (WCET)
analysis. Static cache locking methods have been
proposed recently to improve Average-Case Execution
Time (ACET) performance. This article presents an
approach, Branch Prediction-directed Dynamic Cache
Locking (BPDCL), to improve system performance through
cache conflict miss reduction. In the proposed
approach, the control flow graph of a program is first
partitioned into disjoint execution regions, then
memory blocks worth locking are determined by
calculating the locking profit for each region. These
two steps are conducted during compilation time. At
runtime, directed by branch predictions, locking
routines are prefetched into a small high-speed buffer.
The predetermined cache locking contents are loaded and
locked at specific execution points during program
execution. Experimental results show that the proposed
BPDCL method exhibits an average improvement of 25.9\%,
13.8\%, and 8.0\% on cache miss rate reduction in
comparison to cases with no cache locking, the static
locking method, and the dynamic locking method,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "156",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kang:2014:HSA,
author = "Chih-Kai Kang and Yu-Jhang Cai and Chin-Hsien Wu and
Pi-Cheng Hsiu",
title = "A Hybrid Storage Access Framework for High-Performance
Virtual Machines",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "157:1--157:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660493",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "In recent years, advances in virtualization technology
have enabled multiple virtual machines to run on a
physical machine, such that each virtual machine can
perform independently with its own operating system.
The IT industry has adopted virtualization technology
because of its ability to improve hardware resource
utilization, achieve low-power consumption, support
concurrent applications, simplify device management,
and reduce maintenance costs. However, because of the
hardware limitation of storage devices, the I/O
capacity could cause performance bottlenecks. To
address the problem, we propose a hybrid storage access
framework that exploits solid-state drives (SSDs) to
improve the I/O performance in a virtualization
environment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "157",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pagani:2014:EEA,
author = "Santiago Pagani and Jian-Jia Chen",
title = "Energy Efficiency Analysis for the Single Frequency
Approximation {(SFA)} Scheme",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "158:1--158:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660490",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy-efficient designs are important issues in
computing systems. This article studies the energy
efficiency of a simple and linear-time strategy, called
the Single Frequency Approximation (SFA) scheme, for
periodic real-time tasks on multicore systems with a
shared supply voltage in a voltage island. The strategy
executes all the cores at a single frequency to just
meet the timing constraints. SFA has been adopted in
the literature after task partitioning, but the
worst-case performance of SFA in terms of energy
consumption incurred is an open problem. We provide
comprehensive analysis for SFA to derive the cycle
utilization distribution for its worst-case behaviour
for energy minimization. Our analysis shows that the
energy consumption incurred by using SFA for task
execution is at most 1.53 (1.74, 2.10, 2.69,
respectively), compared to the energy consumption of
the optimal voltage/frequency scaling, when the dynamic
power consumption is a cubic function of the frequency
and the voltage island has up to 4 (8, 16, 32,
respectively) cores. The analysis shows that SFA is
indeed an effective scheme under practical settings,
even though it is not optimal. Furthermore, since all
the cores run at a single frequency and no frequency
alignment for Dynamic Voltage and Frequency Scaling
(DVFS) between cores is needed, any unicore dynamic
power management technique for reducing the energy
consumption for idling can be easily incorporated
individually on each core in the voltage island. This
article also provides an analysis of energy consumption
for SFA combined with procrastination for Dynamic Power
Management (DPM), resulting in an increment of 1 from
the previous results for task execution. Furthermore,
we also extend our analysis for deriving the
approximation factor of SFA for a multicore system with
multiple voltage islands.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "158",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Raravi:2014:TAA,
author = "Gurulingesh Raravi and Vincent N{\'e}lis",
title = "Task Assignment Algorithms for Heterogeneous
Multiprocessors",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "159:1--159:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660494",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Consider the problem of assigning implicit-deadline
sporadic tasks on a heterogeneous multiprocessor
platform comprising a constant number (denoted by $t$)
of distinct types of processors-such a platform is
referred to as a $t$-type platform. We present two
algorithms, LPG$_{IM}$ and LPG$_{NM}$, each providing
the following guarantee. For a given $t$-type platform
and a task set, if there exists a task assignment such
that tasks can be scheduled to meet their deadlines by
allowing them to migrate only between processors of the
same type (intra-migrative), then: (i) LPG$_{IM}$
succeeds in finding such an assignment where the same
restriction on task migration applies (intra-migrative)
but given a platform in which only one processor of
each type is $ 1 + \alpha \times t - 1 / t$ times
faster and (ii) LPG$_{NM}$ succeeds in finding a task
assignment where tasks are not allowed to migrate
between processors (non-migrative) but given a platform
in which every processor is $ 1 + \alpha $ times
faster. The parameter $ \alpha $ is a property of the
task set; it is the maximum of all the task
utilizations that are no greater than one. To the best
of our knowledge, for $t$-type heterogeneous
multiprocessors: (i) for the problem of intra-migrative
task assignment, no previous algorithm exists with a
proven bound and hence our algorithm, LPG$_{IM}$, is
the first of its kind and (ii) for the problem of
non-migrative task assignment, our algorithm,
LPG$_{NM}$, has superior performance compared to
state-of-the-art.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "159",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Andersson:2014:PGT,
author = "Bj{\"o}rn Andersson and Gurulingesh Raravi",
title = "Provably Good Task Assignment for Two-Type
Heterogeneous Multiprocessors Using Cutting Planes",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "160:1--160:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660495",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Consider scheduling of real-time tasks on a
multiprocessor where migration is forbidden.
Specifically, consider the problem of determining a
task-to-processor assignment for a given collection of
implicit-deadline sporadic tasks upon a multiprocessor
platform in which there are two distinct types of
processors. For this problem, we propose a new
algorithm, LPC (task assignment based on solving a
Linear Program with Cutting planes). The algorithm
offers the following guarantee: for a given task set
and a platform, if there exists a feasible
task-to-processor assignment, then LPC succeeds in
finding such a feasible task-to-processor assignment as
well but on a platform in which each processor is $ 1.5
\times $ faster and has three additional processors.
For systems with a large number of processors, LPC has
a better approximation ratio than state-of-the-art
algorithms. To the best of our knowledge, this is the
first work that develops a provably good real-time task
assignment algorithm using cutting planes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "160",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mancuso:2014:OPA,
author = "Giulio M. Mancuso and Enrico Bini and Gabriele
Pannocchia",
title = "Optimal Priority Assignment to Control Tasks",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "161:1--161:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660496",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In embedded real-time systems, task priorities are
often assigned to meet deadlines. However, in control
tasks, a late completion of a task has no catastrophic
consequence; rather, it has a quantifiable impact in
the control performance achieved by the task. In this
article, we address the problem of determining the
optimal assignment of priorities and periods of
sampled-data control tasks that run over a shared
computation unit. We show that the minimization of the
overall cost can be performed efficiently using a
branch and bound algorithm that can be further speeded
up by allowing for a small degree of suboptimality.
Detailed numerical simulations are presented to show
the advantages of various branching alternatives, the
overall algorithm effectiveness, and its scalability
with the number of tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "161",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DeNiz:2014:UBR,
author = "Dionisio {De Niz} and Lutz Wrage and Anthony Rowe and
Ragunathan (Raj) Rajkumar",
title = "Utility-Based Resource Overbooking for Cyber-Physical
Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "162:1--162:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660497",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Traditional hard real-time scheduling algorithms
require the use of the worst-case execution times to
guarantee that deadlines will be met. Unfortunately,
many algorithms with parameters derived from sensing
the physical world suffer large variations in execution
time, leading to pessimistic overall utilization, such
as visual recognition tasks. In this article, we
present ZS-QRAM, a scheduling approach that enables the
use of flexible execution times and application-derived
utility to tasks in order to maximize total system
utility. In particular, we provide a detailed
description of the algorithm, the formal proofs for its
temporal protection, and a detailed, evaluation. Our
evaluation uses the Utility Degradation Resilience
(UDR) showing that ZS-QRAM is able to obtain $ 4 \times
$ as much UDR as ZSRM, a previous overbooking approach,
and almost $ 2 \times $ as much UDR as Rate-Monotonic
with Period Transformation (RM/TP). We then evaluate a
Linux kernel module implementation of our scheduler on
an Unmanned Air Vehicle (UAV) platform. We show that,
by using our approach, we are able to keep the tasks
that render the most utility by degrading lower-utility
ones even in the presence of highly dynamic execution
times.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "162",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2014:STD,
author = "Kai Liu and Victor C. S. Lee and Joseph K. Y. Ng and
Sang H. Son and Edwin H.-M. Sha",
title = "Scheduling Temporal Data with Dynamic Snapshot
Consistency Requirement in Vehicular Cyber-Physical
Systems",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "163:1--163:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629546",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 6 16:07:59 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Timely and efficient data dissemination is one of the
fundamental requirements to enable innovative
applications in vehicular cyber-physical systems
(VCPS). In this work, we intensively analyze the
characteristics of temporal data dissemination in VCPS.
On this basis, we formulate the static and dynamic
snapshot consistency requirements on serving real-time
requests for temporal data items. Two online algorithms
are proposed to enhance the system performance with
different requirements. In particular, a reschedule
mechanism is developed to make the scheduling adaptable
to the dynamic snapshot consistency requirement. A
comprehensive performance evaluation demonstrates the
superiority of the proposed algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "163",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Goehringer:2014:ISI,
author = "Diana Goehringer",
title = "Introduction to the {Special Issue on Virtual
Prototyping of Parallel and Embedded Systems (ViPES)}",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "164:1--164:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2675739",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jan 7 15:03:31 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "164",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schumacher:2014:LLS,
author = "Christoph Schumacher and Jan Henrik Weinstock and
Rainer Leupers and Gerd Ascheid and Laura Tosoratto and
Alessandro Lonardo and Dietmar Petras and Andreas
Hoffmann",
title = "{legaSCi}: Legacy {SystemC} Model Integration into
Parallel Simulators",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "165:1--165:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2678018",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jan 7 15:03:31 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Architects and developers use virtual prototypes of
computer systems to receive early feedback on hardware
design decisions as well as to develop and debug system
software. This is facilitated by the comprehensive
inspection capabilities virtual prototypes offer. For
virtual prototypes, execution speed is crucial to
support the users' productivity. Parallel simulation
techniques are employed to offset the speed impact of
the increasing number of cores that need to be
simulated in virtual prototypes of parallel and
embedded systems. SystemC is the de facto industry
standard library for virtual platform modeling. Since
currently no parallel SystemC library is commonly
available, typical SystemC models are coded for
execution in sequential simulation environments. Simply
putting such models into parallel simulators may lead
to thread-safety issues and may additionally cause
nondeterministic simulator behavior. This article
proposes a methodology to support simulation creators
to face the challenge of integrating such legacy models
into parallel SystemC environments. The feasibility of
the proposed method is evaluated by parallelizing the
latest instance of the EU FP7 project EURETILE embedded
platform simulator. Using legaSCi, on four host
processor cores a speedup of 2.13$ \times $ is
demonstrated, without having to change the individual
models of the simulator.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "165",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Razaghi:2014:HCM,
author = "Parisa Razaghi and Andreas Gerstlauer",
title = "Host-Compiled Multicore System Simulation for Early
Real-Time Performance Evaluation",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "166:1--166:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2678020",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jan 7 15:03:31 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With increasing complexity and software content,
modern embedded platforms employ a heterogeneous mix of
multicore processors along with hardware accelerators
in order to provide high performance in limited power
budgets. To evaluate real-time performance and other
constraints, full system simulations are essential.
With traditional approaches being either slow or
inaccurate, so-called source-level or host-compiled
simulators have recently emerged as a solution for
rapid evaluation of the complete system at early design
stages. In such approaches, a faster simulation is
achieved by abstracting execution behavior and
increasing simulation granularity. However, existing
source-level simulators often focus on application
behavior only while neglecting the effects of
hardware/software interactions and their associated
speed and accuracy trade-offs. In this article, we
present a host-compiled simulator that emulates
software execution in a full-system context. Our
simulator incorporates abstract models of both
real-time operating systems (RTOSs) and multicore
processors to replicate timing-accurate
hardware/software interactions and to enable full
system cosimulation. An integrated approach for
automatic timing granularity adjustment (ATGA) uses
observations of the system state to automatically
control the timing model and optimally navigate speed
versus accuracy conditions. Results as applied to
industrial-strength platforms confirm that OS- and
system-level effects can significantly contribute to
overall accuracy and simulation overhead. By providing
careful abstractions, our models can achieve full
system simulations at equivalent speeds of more than a
thousand MIPS with less than 3\% timing error. Coupled
with the capability to easily adjust simulation
parameters and configurations, this demonstrates the
benefits of our simulator for early application
development and design space exploration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "166",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mihajlovic:2014:DIQ,
author = "Bojan Mihajlovi{\'c} and Zeljko Zili{\'c} and Warren
J. Gross",
title = "Dynamically Instrumenting the {QEMU} Emulator for
{Linux} Process Trace Generation with the {GDB}
Debugger",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "167:1--167:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2678022",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jan 7 15:03:31 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/gnu.bib;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "In software debugging, trace generation techniques are
used to resolve highly complex bugs. However, the
emulators increasingly used for embedded software
development do not yet offer the types of trace
generation infrastructure available in hardware. In
this article, we make changes to the ARM ISA emulation
of the QEMU emulator to allow for continuous
instruction-level trace generation. Using a standard
GDB client, tracepoints can be inserted to dynamically
log registers and memory addresses without altering
executing code. The ability to run trace experiments in
five different modes allows the scope of trace
generation to be narrowed as needed, down to the level
of a single Linux process. Our scheme collects the
execution traces of a Linux process on average between
9.6x--0.7x the speed of existing QEMU trace
capabilities, with 96.7\% less trace data volume.
Compared to a software-instrumented tracing scheme, our
method is both unobtrusive and performs on average
between 3--4 orders of magnitude faster.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "167",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Diamantopoulos:2014:PFS,
author = "Dionysios Diamantopoulos and Efstathios
Sotiriou-Xanthopoulos and Kostas Siozios and George
Economakos and Dimitrios Soudris",
title = "{Plug\&Chip}: a Framework for Supporting Rapid
Prototyping of {$3$D} Hybrid Virtual {SoCs}",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "168:1--168:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661634",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jan 7 15:03:31 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the embedded system domain there is a continuous
demand towards providing higher flexibility for
application development. This trend strives for virtual
prototyping solutions capable of performing fast system
simulation. Among other benefits, such a solution
supports concurrent hardware/software system design by
enabling to start developing, testing, and validating
the embedded software substantially earlier than has
been possible in the past. Towards this direction,
throughout this article we introduce a new framework,
named Plug\&Chip, targeting to support rapid
prototyping of 2D and 3D digital systems. In contrast
to other relevant approaches, our solution provides
higher flexibility by enabling incremental system
design, while also handling platforms developed with
the usage of 3D integration technology.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "168",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Siozios:2014:FSA,
author = "Kostas Siozios and Dimitrios Soudris and Michael
H{\"u}bner",
title = "A Framework for Supporting Adaptive Fault-Tolerant
Solutions",
journal = j-TECS,
volume = "13",
number = "5s",
pages = "169:1--169:??",
month = nov,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629473",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jan 7 15:03:31 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "For decades, computer architects pursued one primary
goal: performance. The ever-faster transistors provided
by Moore's law were translated into remarkable gains in
operation frequency and power consumption. However, the
device-level size and architecture complexity impose
several new challenges, including a decrease in
dependability level due to physical failures. In this
article we propose a software-supported methodology
based on game theory for adapting the aggressiveness of
fault tolerance at runtime. Experimental results prove
the efficiency of our solution since it achieves
comparable fault masking to relevant solutions, but
with significantly lower mitigation cost. More
specifically, our framework speeds up the
identification of suspicious failure resources on
average by 76\% as compared to the HotSpot tool.
Similarly, the introduced solution leads to average
Power$ \times $Delay (PDP) savings against an existing
TMR approach by 53\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "169",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2015:ERS,
author = "Sandeep K. Shukla",
title = "Editorial: Regular, Special, and Related Issues",
journal = j-TECS,
volume = "14",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2698230",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bishnoi:2015:BCC,
author = "Rimpy Bishnoi and Vijay Laxmi and Manoj Singh Gaur and
Jos{\'e} Flich and Francisco Trivi{\~n}o",
title = "A Brief Comment on {``A Complete Self-Testing and
Self-Configuring NoC Infrastructure for Cost-Effective
MPSoCs'' [ACM Transactions on Embedded Computing
Systems {\bf 12} (2013) Article 106]}",
journal = j-TECS,
volume = "14",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2668121",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
note = "See \cite{Ghiribaldi:2013:CST}.",
abstract = "In the Ghiribaldi et al. [2013] paper, a complete
self-testing and self configuring NoC infrastructure
for cost-effective MPSoCs was presented in order to
make NoC architecture tolerant to faults. To overcome
the complexity involved during the complete
reconfiguration of routing instances in the face of
most of the usual failure patterns, Ghiribaldi et al.
[2013] proposed a fast self-reconfiguration algorithm.
The algorithm is based on segment-based routing
implemented using Logic-Based Distributed Routing
(LBDR) and claimed to have handled the most common NoC
faults. The purpose of this comment is to demonstrate
the inconsistency of the fast self-configuration method
presented in Ghiribaldi et al. [2013]. To handle
inconsistency, we present the correct set of LBDR bits
and also argue that complete reconfiguration of the
routing instance is mandatory to handle some fault
combinations. New coverage results of the fast
self-reconfiguration algorithm of Ghiribaldi et al.
[2013] are also presented.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Munir:2015:MAF,
author = "Arslan Munir and Joseph Antoon and Ann Gordon-Ross",
title = "Modeling and Analysis of Fault Detection and Fault
Tolerance in Wireless Sensor Networks",
journal = j-TECS,
volume = "14",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2680538",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Technological advancements in communications and
embedded systems have led to the proliferation of
Wireless Sensor Networks (WSNs) in a wide variety of
application domains. These application domains include
but are not limited to mission-critical (e.g.,
security, defense, space, satellite) or safety-related
(e.g., health care, active volcano monitoring) systems.
One commonality across all WSN application domains is
the need to meet application requirements (e.g.,
lifetime, reliability). Many application domains
require that sensor nodes be deployed in harsh
environments, such as on the ocean floor or in an
active volcano, making these nodes more prone to
failures. Sensor node failures can be catastrophic for
critical or safety-related systems. This article models
and analyzes fault detection and fault tolerance in
WSNs. To determine the effectiveness and accuracy of
fault detection algorithms, we simulate these
algorithms using ns-2. We investigate the synergy
between fault detection and fault tolerance and use the
fault detection algorithms' accuracies in our modeling
of Fault-Tolerant (FT) WSNs. We develop Markov models
for characterizing WSN reliability and Mean Time to
Failure (MTTF) to facilitate WSN application-specific
design. Results obtained from our FT modeling reveal
that an FT WSN composed of duplex sensor nodes can
result in as high as a 100\% MTTF increase and
approximately a 350\% improvement in reliability over a
Non-Fault-Tolerant (NFT) WSN. The article also
highlights future research directions for the design
and deployment of reliable and trustworthy WSNs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sarkar:2015:STP,
author = "Abhik Sarkar and Frank Mueller and Harini Ramaprasad",
title = "Static Task Partitioning for Locked Caches in
Multicore Real-Time Systems",
journal = j-TECS,
volume = "14",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638557",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Growing processing demand on multitasking real-time
systems can be met by employing scalable multicore
architectures. For such environments, locking cache
lines for hard real-time systems ensures timing
predictability of data references and may lower
worst-case execution time. This work studies the
benefits of cache locking on massive multicore
architectures with private caches in the context of
hard real-time systems. In shared cache architectures,
the cache is a single resource shared among all of the
tasks. However, in scalable cache architectures with
private caches, conflicts exist only among the tasks
scheduled on one core. This calls for a cache-aware
allocation of tasks onto cores. The objective of this
work is to increase the predictability of memory
accesses resolved by caches while reducing the number
of cores for a given task set. This allows designers to
reduce the footprint of their subsystem of real-time
tasks and thereby cost, either by choosing a product
with fewer cores as a target or to allow more
subsystems to be co-located on a given fixed number of
cores. Our work proposes a novel variant of the
cache-unaware First Fit Decreasing (FFD) algorithm
called Naive locked First Fit Decreasing (NFFD) policy.
We propose two cache-aware static scheduling schemes:
(a) Greedy First Fit Decreasing (GFFD) and (b) Colored
First Fit Decreasing (CoFFD) for task sets where tasks
do not have intratask conflicts among locked regions
(Scenario A). NFFD is capable of scheduling high
utilization task sets that FFD cannot schedule.
Experiments also show that CoFFD consistently
outperforms GFFD, resulting in a lower number of cores
and lower system utilization. CoFFD reduces the number
of core requirements by 30\% to 60\% compared to NFFD.
For a more generic case where tasks have intratask
conflicts, we split the task partitioning between two
phases: task selection and task allocation (Scenario
B). Instead of resolving conflicts at a global level,
these algorithms resolve conflicts among regions while
allocating a task onto a core and unlocking at region
level instead of task level. We show that a combination
of dynamic ordering (task selection) with Chaitin's
Coloring (task allocation) scheme reduces the number of
cores required by up to 22\% over a basic scheme (in a
combination of monotone ordering and regional FFD).
Regional unlocking allows this scheme to outperform
CoFFD for medium utilization task sets from Scenario A.
However, CoFFD performs better than any other scheme
for high utilization task sets from Scenario A.
Overall, this work is unique in considering the
challenges of future multicore architectures for
real-time systems and provides key insights into task
partitioning and cache-locking mechanisms for
architectures with private caches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tillenius:2015:RAT,
author = "Martin Tillenius and Elisabeth Larsson and Rosa M.
Badia and Xavier Martorell",
title = "Resource-Aware Task Scheduling",
journal = j-TECS,
volume = "14",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638554",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dependency-aware task-based parallel programming
models have proven to be successful for developing
efficient application software for multicore-based
computer architectures. The programming model is
amenable to programmers, thereby supporting
productivity, whereas hardware performance is achieved
through a runtime system that dynamically schedules
tasks onto cores in such a way that all dependencies
are respected. However, even if the scheduling is
completely successful with respect to load balancing,
the scaling with the number of cores may be suboptimal
due to resource contention. Here we consider the
problem of scheduling tasks not only with respect to
their interdependencies but also with respect to their
usage of resources, such as memory and bandwidth. At
the software level, this is achieved by user
annotations of the task resource consumption. In the
runtime system, the annotations are translated into
scheduling constraints. Experimental results for
different hardware, demonstrating performance gains
both for model examples and real applications, are
presented. Furthermore, we provide a set of tools to
detect resource sensitivity and predict the performance
improvements that can be achieved by resource-aware
scheduling. These tools are solely based on parallel
execution traces and require no instrumentation or
modification of the application code.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2015:JWU,
author = "Yazhi Huang and Mengying Zhao and Chun Jason Xue",
title = "Joint {WCET} and Update Activity Minimization for
Cyber-Physical Systems",
journal = j-TECS,
volume = "14",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2680539",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A cyber-physical system (CPS) is a desirable computing
platform for many industrial and scientific
applications, such as industrial process monitoring,
environmental monitoring, chemical processes, and
battlefield surveillance. The application of CPSs has
two challenges: First, CPSs often include a number of
sensor nodes. Update of preloaded code on remote sensor
nodes powered by batteries is extremely energy
consuming. The code update issue in the
energy-sensitive CPS must be carefully considered.
Second, CPSs are often real-time embedded systems with
real-time properties. Worst-case execution time (WCET)
is one of the most important metrics in real-time
system design. Whereas existing works only consider one
of these two challenges at a time, in this article, a
compiler optimization-joint WCET and update-conscious
compilation, or WUCC-is proposed to jointly consider
WCET and code update for CPSs. The novelty of the
proposed approach is that the WCET problem and code
update problem are considered concurrently such that a
balanced solution with minimal WCET and minimal code
difference can be achieved. The experimental results
show that the proposed technique can minimize WCET and
code difference effectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bertozzi:2015:PRA,
author = "Davide Bertozzi and Stefano {Di Carlo} and Salvatore
Galfano and Marco Indaco and Piero Olivo and Paolo
Prinetto and Cristian Zambelli",
title = "Performance and Reliability Analysis of Cross-Layer
Optimizations of {NAND} Flash Controllers",
journal = j-TECS,
volume = "14",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629562",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "NAND flash memories are becoming the predominant
technology in the implementation of mass storage
systems for both embedded and high-performance
applications. However, when considering data and code
storage in Non-Volatile Memories (NVMs), such as NAND
flash memories, reliability and performance become a
serious concern for systems designers. Designing NAND
flash-based systems based on worst-case scenarios leads
to waste of resources in terms of performance, power
consumption, and storage capacity. This is clearly in
contrast with the request for runtime
reconfigurability, adaptivity, and resource
optimization in modern computing systems. There is a
clear trend toward supporting differentiated access
modes in flash memory controllers, each one setting a
differentiated tradeoff point in the
performance-reliability optimization space. This is
supported by the possibility of tuning the NAND flash
memory performance, reliability, and power consumption
through several tuning knobs such as the flash
programming algorithm and the flash error correcting
code. However, to successfully exploit these degrees of
freedom, it is mandatory to clearly understand the
effect that the combined tuning of these parameters has
on the full NVM subsystem. This article performs a
comprehensive quantitative analysis of the benefits
provided by the runtime reconfigurability of an MLC
NAND flash controller through the combined effect of an
adaptable memory programming circuitry coupled with
runtime adaptation of the ECC correction capability.
The full NVM subsystem is taken into account, starting
from a characterization of the low-level circuitry to
the effect of the adaptation on a wide set of realistic
benchmarks in order to provide readers a clear view of
the benefit this combined adaptation may provide at the
system level.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2015:SLP,
author = "Ye-Jyun Lin and Chia-Lin Yang and Jiao-We Huang and
Tay-Jyi Lin and Chih-Wen Hsueh and Naehyuck Chang",
title = "System-Level Performance and Power Optimization for
{MPSoC}: a Memory Access-Aware Approach",
journal = j-TECS,
volume = "14",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As the number of IPs in a multimedia Multi-Processor
System-on-Chip (MPSoC) continues to increase,
concurrent memory accesses from different IPs
increasingly stress memory systems, which presents both
opportunities and challenges for future MPSoC design.
The impact of such requirements on the system-level
design for MPSoC is twofold. First, contention among
IPs prolongs memory access time, which exacerbates the
persisting memory wall problem. Second, longer memory
accesses lead to longer IP stall time, which results in
unnecessary leakage waste. In this article, we propose
two memory access-aware system-level design approaches
for performance and leakage optimization. To alleviate
the memory wall problem, we propose a Hierarchical
Memory Scheduling (HMS) policy that schedules memory
requests from the same IP and application consecutively
to reduce interference among memory accesses from
different IPs with a fairness guarantee. To reduce IP
leakage waste due to long memory access, we propose a
memory access-aware power-gating policy. A
straightforward power-gating approach is to power gate
an IP when it needs to fetch data from memory. However,
due to the response time variation among memory
accesses, aggressively power gating an IP whenever a
memory request occurs may result in incorrect
power-gating decisions. The proposed memory
access-aware power-gating policy makes these decisions
judiciously, based on the predicted memory latency of
an individual IP and its energy breakeven time. The
experimental results show that the proposed HMS memory
scheduling policy improves system throughput by 42\%
compared to First-Come-First-Serve (FCFS) and by 21\%
compared to First-Ready First-Come-First-Serve
(FR-FCFS) on an MPSoC for mobile phones. For the
improvement of fairness, HMS improves fairness by 1.52$
\times $ compared to FCFS and by 1.23$ \times $
compared to FRFCFS. In the aspect of leakage
optimization, our memory access-aware power-gating
mechanism improves energy savings by 3.88$ \times $ and
reduces the performance penalty by 70\% compared to
conventional timeout-based power gating. We further
demonstrate that our HMS memory scheduler can regulate
memory access orders, thereby reducing memory response
time variation. This leads to more accurate power-down
decisions for both conventional timeout power gating
and the proposed memory access- aware power gating.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Borgstrom:2015:PCW,
author = "Johannes Borgstrom and Ramunas Gutkovas and Ioana
Rodhe and Bj{\"o}rn Victor",
title = "The Psi-Calculi Workbench: a Generic Tool for Applied
Process Calculi",
journal = j-TECS,
volume = "14",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2682570",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Psi-calculi is a parametric framework for extensions
of the pi-calculus with arbitrary data and logic. All
instances of the framework inherit machine-checked
proofs of the metatheory such as compositionality and
bisimulation congruence. We present a generic analysis
tool for psi-calculus instances, enabling symbolic
execution and (bi)simulation checking for both unicast
and broadcast communication. The tool also provides a
library for implementing new psi-calculus instances. We
provide examples from traditional communication
protocols and wireless sensor networks. We also
describe the theoretical foundations of the tool,
including an improved symbolic operational semantics,
with additional support for scoped broadcast
communication.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{VanHulst:2015:MSH,
author = "A. C. {Van Hulst} and M. A. Reniers and W. J.
Fokkink",
title = "Maximal Synthesis for {Hennessy--Milner} Logic",
journal = j-TECS,
volume = "14",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2680540",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article concerns the maximal synthesis for
Hennessy--Milner Logic on Kripke structures with
labeled transitions. We formally define, and prove the
validity of, a theoretical framework that modifies a
Kripke model to the least possible extent in order to
satisfy a given HML formula. Applications of this work
can be found in the field of controller synthesis and
supervisory control for discrete-event systems.
Synthesis is realized technically by first projecting
the given Kripke model onto a bisimulation-equivalent
partial tree representation, thereby unfolding up to
the depth of the synthesized formula. Operational rules
then define the required adaptations upon this
structure in order to achieve validity of the
synthesized formula. Synthesis might result in multiple
valid adaptations, which are all related to the
original model via simulation. Each simulant of the
original Kripke model, which satisfies the synthesized
formula, is also related to one of the synthesis
results via simulation. This indicates maximality, or
maximal permissiveness, in the context of supervisory
control. In addition to the formal construction of
synthesis as presented in this article, we present it
in algorithmic form and analyze its computational
complexity. Computer-verified proofs for two important
theorems in this article have been created using the
Coq proof assistant.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boucheneb:2015:SST,
author = "Hanifa Boucheneb and Kamel Barkaoui",
title = "Stubborn Sets for Time {Petri} Nets",
journal = j-TECS,
volume = "14",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2680541",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The main limitation of the verification approaches
based on state enumeration is the state explosion
problem. The partial order reduction techniques aim at
attenuating this problem by reducing the number of
transitions to be fired from each state while
preserving properties of interest. Among the reduction
techniques proposed in the literature, this article
considers the stubborn set method of Petri nets and
investigates its extension to time Petri nets. It
establishes some useful sufficient conditions for
stubborn sets, which preserve deadlocks and
k-boundedness of places.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2015:HFY,
author = "Abhisek Pan and Rance Rodrigues and Sandip Kundu",
title = "A Hardware Framework for Yield and Reliability
Enhancement in Chip Multiprocessors",
journal = j-TECS,
volume = "14",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629688",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Device reliability and manufacturability have emerged
as dominant concerns in end-of-road CMOS devices. An
increasing number of hardware failures are attributed
to manufacturability or reliability problems.
Maintaining an acceptable manufacturing yield for chips
containing tens of billions of transistors with wide
variations in device parameters has been identified as
a great challenge. Additionally, today's nanometer
scale devices suffer from accelerated aging effects
because of the extreme operating temperature and
electric fields they are subjected to. Unless addressed
in design, aging-related defects can significantly
reduce the lifetime of a product. In this article, we
investigate a micro-architectural scheme for improving
yield and reliability of homogeneous chip
multiprocessors (CMPs). The proposed solution involves
a hardware framework that enables us to utilize the
redundancies inherent in a multicore system to keep the
system operational in the face of partial failures. A
micro-architectural modification allows a faulty core
in a CMP to use another core's resources to service any
instruction that the former cannot execute correctly by
itself. This service improves yield and reliability but
may cause loss of performance. The target platform for
quantitative evaluation of performance under
degradation is a dual-core and a quad-core chip
multiprocessor with one or more cores sustaining
partial failure. Simulation studies indicate that when
a large, high-latency, and sparingly used unit such as
a floating-point unit fails in a core, correct
execution may be sustained through outsourcing with at
most a 16\% impact on performance for a floating-point
intensive application. For applications with moderate
floating-point load, the degradation is insignificant.
The performance impact may be mitigated even further by
judicious selection of the cores to commandeer
depending on the current load on each of the candidate
cores. The area overhead is also negligible due to
resource reuse.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lazarescu:2015:ITB,
author = "Mihai T. Lazarescu and Luciano Lavagno",
title = "Interactive Trace-Based Analysis Toolset for Manual
Parallelization of {C} Programs",
journal = j-TECS,
volume = "14",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638556",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Massive amounts of legacy sequential code need to be
parallelized to make better use of modern
multiprocessor architectures. Nevertheless, writing
parallel programs is still a difficult task. Automated
parallelization methods can be effective both at the
statement and loop levels and, recently, at the task
level, but they are still restricted to specific source
code constructs or application domains. We present in
this article an innovative toolset that supports
developers when performing manual code analysis and
parallelization decisions. It automatically collects
and represents the program profile and data
dependencies in an interactive graphical format that
facilitates the analysis and discovery of manual
parallelization opportunities. The toolset can be used
for arbitrary sequential C programs and parallelization
patterns. Also, its program-scope data dependency
tracing at runtime can complement the tools based on
static code analysis and can also benefit from it at
the same time. We also tested the effectiveness of the
toolset in terms of time to reach parallelization
decisions and of their quality. We measured a
significant improvement for several real-world
representative applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Quan:2015:HTM,
author = "Wei Quan and Andy D. Pimentel",
title = "A Hybrid Task Mapping Algorithm for Heterogeneous
{MPSoCs}",
journal = j-TECS,
volume = "14",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2680542",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The application workloads in modern MPSoC-based
embedded systems are becoming increasingly dynamic.
Different applications concurrently execute and contend
for resources in such systems, which could cause
serious changes in the intensity and nature of the
workload demands over time. To cope with the dynamism
of application workloads at runtime and improve the
efficiency of the underlying system architecture, this
article presents a hybrid task mapping algorithm that
combines a static mapping exploration and a dynamic
mapping optimization to achieve an overall improvement
of system efficiency. We evaluate our algorithm using a
heterogeneous MPSoC system with three real
applications. Experimental results reveal the
effectiveness of our proposed algorithm by comparing
derived solutions to the ones obtained from several
other runtime mapping algorithms. In test cases with
three simultaneously active applications, the mapping
solutions derived by our approach have average
performance improvements ranging from 45.9\% to 105.9\%
and average energy savings ranging from 14.6\% to
23.5\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Petrucci:2015:EET,
author = "Vinicius Petrucci and Orlando Loques and Daniel
Moss{\'e} and Rami Melhem and Neven Abou Gazala and
Sameh Gobriel",
title = "Energy-Efficient Thread Assignment Optimization for
Heterogeneous Multicore Systems",
journal = j-TECS,
volume = "14",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2566618",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The current trend to move from homogeneous to
heterogeneous multicore systems provides compelling
opportunities for achieving performance and energy
efficiency goals. Running multiple threads in multicore
systems poses challenges on meeting limited shared
resources, such as memory bandwidth. We propose an
optimization approach that includes an Integer Linear
Programming (ILP) optimization model and a scheme to
dynamically determine thread-to-core assignment. We
present simulation analysis that shows energy savings
and performance gains for a variety of workloads
compared to state-of-the-art schemes. We implemented
and evaluated a prototype of our thread assignment
approach at user level, leveraging Linux scheduling and
performance-monitoring capabilities.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2015:ESV,
author = "Zhengfeng Yang and Wang Lin and Min Wu",
title = "Exact Safety Verification of Hybrid Systems Based on
Bilinear {SOS} Representation",
journal = j-TECS,
volume = "14",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629424",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we address the problem of safety
verification of nonlinear hybrid systems. A hybrid
symbolic-numeric method is presented to compute exact
inequality invariants of hybrid systems efficiently.
Some numerical invariants of a hybrid system can be
obtained by solving a bilinear SOS programming via the
PENBMI solver or iterative method, then the modified
Newton refinement and rational vector recovery
techniques are applied to obtain exact polynomial
invariants with rational coefficients, which exactly
satisfy the conditions of invariants. Experiments on
some benchmarks are given to illustrate the efficiency
of our algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rodrigues:2015:DSE,
author = "Rance Rodrigues and Israel Koren and Sandip Kundu",
title = "Does the Sharing of Execution Units Improve
Performance\slash Power of Multicores?",
journal = j-TECS,
volume = "14",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2680543",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Several studies and recent real-world designs have
promoted sharing of underutilized resources between
cores in a multicore processor to achieve better
performance/power. It has been argued that when
utilization of such resources is low, sharing has a
negligible impact on performance while offering
considerable area and power benefits. In this article,
we investigate the performance and performance/watt
implications of sharing large and underutilized
resources between pairs of cores in a multicore. We
first study sharing of the entire floating-point
datapath (including reservation stations and execution
units) by two cores, similar to AMD's Bulldozer. We
find that while this architecture results in power
savings for certain workload combinations, it also
results in significant performance loss of up to 28\%.
Next, we study an alternative sharing architecture
where only the floating-point execution units are
shared, while the individual cores retain their
reservation stations. This reduces the highest
performance loss to 14\%. We then extend the study to
include sharing of other large execution units that are
used infrequently, namely, the integer multiply and
divide units. Subsequently, we analyze the impact of
sharing hardware resources in Simultaneously
Multithreaded (SMT) processors where multiple threads
run concurrently on the same core. We observe that
sharing improves performance/watt at a negligible
performance cost only if the shared units have high
throughput. Sharing low-throughput units reduces both
performance and performance/watt. To increase the
throughput of the shared units, we propose the use of
Dynamic Voltage and Frequency Boosting (DVFB) of only
the shared units that can be placed on a separate
voltage island. Our results indicate that the use of
DVFB improves both performance and performance/watt by
as much as 22\% and 10\%, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Diamantopoulos:2015:GPA,
author = "Dionysios Diamantopoulos and Kostas Siozios and
Sotirios Xydis and Dimitrios Soudris",
title = "{GENESIS}: Parallel Application Placement onto
Reconfigurable Architectures (Invited for the Special
Issue on Runtime Management)",
journal = j-TECS,
volume = "14",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629651",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Placement is though as the most time-consuming
processes in physical implementation flows for
reconfigurable architectures, while it highly affects
the quality of derived application implementation, as
it has impact on the maximum operating frequency.
Throughout this article, we propose a novel placer,
based on genetic algorithm, targeting to FPGAs. Rather
than relevant approaches, which are executed
sequentially, the new placer exhibits inherent
parallelism, which can benefit from multicore
processors. Experimental results prove the
effectiveness of this solution, as it achieves average
reduction of execution runtime and application's delay
by 67$ \times $ and 16\%, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pager:2015:SSM,
author = "Jared Pager and Reiley Jeyapaul and Aviral
Shrivastava",
title = "A Software Scheme for Multithreading on {CGRAs}",
journal = j-TECS,
volume = "14",
number = "1",
pages = "19:1--19:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638558",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 22 06:25:23 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent industry trends show a drastic rise in the use
of hand-held embedded devices, from everyday
applications to medical (e.g., monitoring devices) and
critical defense applications (e.g., sensor nodes). The
two key requirements in the design of such devices are
their processing capabilities and battery life. There
is therefore an urgency to build high-performance and
power-efficient embedded devices, inspiring researchers
to develop novel system designs for the same. The use
of a coprocessor (application-specific hardware) to
offload power-hungry computations is gaining favor
among system designers to suit their power budgets. We
propose the use of CGRAs (Coarse-Grained Reconfigurable
Arrays) as a power-efficient coprocessor. Though CGRAs
have been widely used for streaming applications, the
extensive compiler support required limits its
applicability and use as a general purpose coprocessor.
In addition, a CGRA structure can efficiently execute
only one statically scheduled kernel at a time, which
is a serious limitation when used as an accelerator to
a multithreaded or multitasking processor. In this
work, we envision a multithreaded CGRA where multiple
schedules (or kernels) can be executed simultaneously
on the CGRA (as a coprocessor). We propose a
comprehensive software scheme that transforms the
traditionally single-threaded CGRA into a multithreaded
coprocessor to be used as a power-efficient accelerator
for multithreaded embedded processors. Our software
scheme includes (1) a compiler framework that
integrates with existing CGRA mapping techniques to
prepare kernels for execution on the multithreaded CGRA
and (2) a runtime mechanism that dynamically schedules
multiple kernels (offloaded from the processor) to
execute simultaneously on the CGRA coprocessor. Our
multithreaded CGRA coprocessor implementation thus
makes it possible to achieve improved power-efficient
computing in modern multithreaded embedded systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2015:EOS,
author = "Sandeep K. Shukla",
title = "Editorial: Oh Security --- Where Art Thou?",
journal = j-TECS,
volume = "14",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742044",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rafiliu:2015:SOR,
author = "Sergiu Rafiliu and Petru Eles and Zebo Peng and
Michael Lemmon",
title = "Stability of Online Resource Managers for Distributed
Systems under Execution Time Variations",
journal = j-TECS,
volume = "14",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629495",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today's embedded systems are exposed to variations in
resource usage due to complex software applications,
hardware platforms, and impact of the runtime
environments. When these variations are large and
efficiency is required, on-line resource managers may
be deployed on the system to help it control its
resource usage. An often neglected problem is whether
these resource managers are stable, meaning that the
resource usage is controlled under all possible
scenarios. In distributed systems, this problem is
particularly hard because applications distributed over
many resources generate complex dependencies between
their resources. In this article, we develop a
mathematical model of the system, and derive conditions
that, if satisfied, guarantee stability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Loke:2015:MCS,
author = "Seng W. Loke and Keegan Napier and Abdulaziz Alali and
Niroshinie Fernando and Wenny Rahayu",
title = "Mobile Computations with Surrounding Devices:
Proximity Sensing and {MultiLayered} Work Stealing",
journal = j-TECS,
volume = "14",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656214",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the proliferation of mobile devices, and their
increasingly powerful embedded processors and storage,
vast resources increasingly surround users. We have
been investigating the concept of on-demand ad hoc
forming of groups of nearby mobile devices in the midst
of crowds to cooperatively perform computationally
intensive tasks as a service to local mobile users, or
what we call mobile crowd computing. As devices can
vary in processing power and some can leave a group
unexpectedly or new devices join in, there is a need
for algorithms that can distribute work in a flexible
manner and still work with different arrangements of
devices that can arise in an ad hoc fashion. In this
article, we first argue for the feasibility of such use
of crowd-embedded computations using theoretical
justifications and reporting on our experiments on
Bluetooth-based proximity sensing. We then present a
multilayered work-stealing style algorithm for
distributing work efficiently among mobile devices and
compare speedups attainable for different topologies of
devices networked with Bluetooth, justifying a
topology-flexible opportunistic approach. While our
experiments are with Bluetooth and mobile devices, the
approach is applicable to ecosystems of various
embedded devices with powerful processors, networking
technologies, and storage that will increasingly
surround users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Malik:2015:HRT,
author = "Avinash Malik and David Gregg",
title = "Heuristics on Reachability Trees for Bicriteria
Scheduling of Stream Graphs on Heterogeneous
Multiprocessor Architectures",
journal = j-TECS,
volume = "14",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638553",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we partition and schedule Synchronous
Dataflow (SDF) graphs onto heterogeneous execution
architectures in such a way as to minimize energy
consumption and maximize throughput. Partitioning and
scheduling SDF graphs onto homogeneous architectures is
a well-known NP-hard problem. The heterogeneity of the
execution architecture makes our problem exponentially
challenging to solve. We model the problem as a
weighted sum and solve it using novel state space
exploration inspired from the theory of parallel
automata. The resultant heuristic algorithm results in
good scheduling when implemented in an existing stream
framework.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Martin:2015:ROS,
author = "Paul Martin and Lucas Wanner and Mani Srivastava",
title = "Runtime Optimization of System Utility with Variable
Hardware",
journal = j-TECS,
volume = "14",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656338",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Increasing hardware variability in newer integrated
circuit fabrication technologies has caused
corresponding power variations on a large scale. These
variations are particularly exaggerated for idle power
consumption, motivating the need to mitigate the
effects of variability in systems whose operation is
dominated by long idle states with periodic active
states. In systems where computation is severely
limited by anemic energy reserves and where a long
overall system lifetime is desired, maximizing the
quality of a given application subject to these
constraints is both challenging and an important step
toward achieving high-quality deployments. This work
describes VaRTOS, an architecture and corresponding set
of operating system abstractions that provide explicit
treatment of both idle and active power variations for
tasks running in real-time operating systems. Tasks in
VaRTOS express elasticity by exposing individual knobs
-shared variables that the operating system can tune to
adjust task quality and, correspondingly, task power,
maximizing application utility both on a per-task and
on a system-wide basis. We provide results regarding
online learning of instance-specific sleep power,
active power, and task-level power expenditure on
simulated hardware with demonstrated effects for
several prototypical applications. Our results on
networked sensing applications, which are
representative of a broader category of applications
that VaRTOS targets, show that VaRTOS can reduce
variability-induced energy expenditure errors from over
70\% in many cases to under 2\% in most cases and under
5\% in the worst case.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gomony:2015:RTM,
author = "Manil Dev Gomony and Benny Akesson and Kees Goossens",
title = "A Real-Time Multichannel Memory Controller and Optimal
Mapping of Memory Clients to Memory Channels",
journal = j-TECS,
volume = "14",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2661635",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Ever-increasing demands for main memory bandwidth and
memory speed/power tradeoff led to the introduction of
memories with multiple memory channels, such as Wide IO
DRAM. Efficient utilization of a multichannel memory as
a shared resource in multiprocessor real-time systems
depends on mapping of the memory clients to the memory
channels according to their requirements on latency,
bandwidth, communication, and memory capacity. However,
there is currently no real-time memory controller for
multichannel memories, and there is no methodology to
optimally configure multichannel memories in real-time
systems. As a first work toward this direction, we
present two main contributions in this article: (1) a
configurable real-time multichannel memory controller
architecture with a novel method for
logical-to-physical address translation and (2) two
design-time methods to map memory clients to the memory
channels, one an optimal algorithm based on an integer
programming formulation of the mapping problem, and the
other a fast heuristic algorithm. We demonstrate the
real-time guarantees on bandwidth and latency provided
by our multichannel memory controller architecture by
experimental evaluation. Furthermore, we compare the
performance of the mapping problem formulation in a
solver and the heuristic algorithm against two existing
mapping algorithms in terms of computation time and
mapping success ratio. We show that an optimal solution
can be found in 2 hours using the solver and in less
than 1 second with less than 7\% mapping failure using
the heuristic for realistically sized problems.
Finally, we demonstrate configuring a Wide IO DRAM in a
high-definition (HD) video and graphics processing
system to emphasize the practical applicability and
effectiveness of this work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jezequel:2015:FPA,
author = "Lo{\"\i}g Jezequel and Eric Fabre and Victor
Khomenko",
title = "Factored Planning: From Automata to {Petri} Nets",
journal = j-TECS,
volume = "14",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656215",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Factored planning mitigates the state explosion
problem by avoiding the construction of the state space
of the whole system and instead working with the
system's components. Traditionally, finite automata
have been used to represent the components, with the
overall system being represented as their product. In
this article, we change the representation of
components to safe Petri nets. This allows one to use
cheap structural operations like transition
contractions to reduce the size of the Petri net before
its state space is generated, which often leads to
substantial savings compared with automata. The
proposed approach has been implemented and proved
efficient on several factored planning benchmarks. This
article is an extended version of our ACSD 2013 paper
[Jezequel et al. 2013], with the addition of the proofs
and the experimental results of Sections 6 and 7.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Taniuchi:2015:AUI,
author = "Daisuke Taniuchi and Takuya Maekawa",
title = "Automatic Update of Indoor Location Fingerprints with
Pedestrian Dead Reckoning",
journal = j-TECS,
volume = "14",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2667226",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we propose a new method for
automatically updating a Wi-Fi indoor positioning model
on a cloud server by employing uploaded sensor data
obtained from the smartphone sensors of a specific user
who spends a lot of time in a given environment (e.g.,
a worker in the environment). In this work, we attempt
to track the user with pedestrian dead reckoning
techniques, and at the same time we obtain Wi-Fi scan
data from a mobile device possessed by the user. With
the scan data and the estimated coordinates uploaded to
a cloud server, we can automatically create a pair
consisting of a scan and its corresponding indoor
coordinates during the user's daily life and update an
indoor positioning model on the server by using the
information. With this approach, we try to cope with
the instability of Wi-Fi-based positioning methods
caused by changing environmental dynamics, that is,
layout changes and moving or removal of Wi-Fi access
points. Therefore, ordinary users (e.g., customers) who
do not have rich sensors can benefit from the
continually updating positioning model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jimenez:2015:LSC,
author = "Xavier Jimenez and David Novo and Paolo Ienne",
title = "{Libra}: Software-Controlled Cell Bit-Density to
Balance Wear in {NAND} Flash",
journal = j-TECS,
volume = "14",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2638552",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hybrid flash storages combine a small Single-Level
Cell (SLC) partition with a large Multilevel Cell (MLC)
partition. Compared to MLC-only solutions, the SLC
partition exploits fast and short local write updates,
while the MLC part brings large capacity. On the whole,
hybrid storage achieves a tangible performance
improvement for a moderate extra cost. Yet, device
lifetime is an important aspect often overlooked: in a
hybrid system, a large ratio of writes may be directed
to the small SLC partition, thus generating a local
stress that could exhaust the SLC lifetime
significantly sooner than the MLC partition's. To
address this issue, we propose Libra, which builds on
flash storage made solely of MLC flash and uses the
memory devices in SLC mode when appropriate; that is,
we exploit the fact that writing a single bit per cell
in an MLC provides characteristics close to those of an
ordinary SLC. In our scheme, the cell bit-density of a
block can be decided dynamically by the flash
controller, and the physical location of the SLC
partition can now be moved around the whole device,
balancing wear across it. This article provides a
thorough analysis and characterization of the SLC mode
for MLCs and gives evidence that the inherent
flexibility provided by Libra simplifies considerably
the stress balance on the device. Overall, our
technique improves lifetime by up to one order of
magnitude at no cost when compared to any hybrid
storage that relies on a static SLC-MLC partitioning.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2015:PVL,
author = "Li-Pin Chang and Yo-Chuan Su and I-Chen Wu",
title = "Plugging Versus Logging: Adaptive Buffer Management
for Hybrid-Mapping {SSDs}",
journal = j-TECS,
volume = "14",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629455",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A promising technique to improve the write performance
of solid-state disks (SSDs) is to use a disk write
buffer. The goals of a write buffer is not only to
reduce the write traffic to the flash chips but also to
convert host write patterns into long and sequential
write bursts. This study proposes a new buffer design
consisting of a replacement policy and a write-back
policy. The buffer monitors how the host workload
stresses the flash translation layer upon garbage
collection. This is used to dynamically adjust its
replacement and write-back strategies for a good
balance between write sequentiality and write
randomness. When the garbage collection overhead is
low, the write buffer favors high write sequentiality
over low write randomness. When the flash translation
layer observes a high overhead of garbage collection,
the write buffer favors low write randomness over high
write sequentiality. The proposed buffer design
outperformed existing approaches by up to 20\% under
various workloads and flash translation algorithms, as
will be shown in experiment results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jia:2015:TAD,
author = "Zhiping Jia and Yang Li and Yi Wang and Meng Wang and
Zili Shao",
title = "Temperature-Aware Data Allocation for Embedded Systems
with Cache and Scratchpad Memory",
journal = j-TECS,
volume = "14",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629650",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The hybrid memory architecture that contains both
on-chip cache and scratchpad memory (SPM) has been
widely used in embedded systems. In this article, we
explore this hybrid memory architecture by jointly
optimizing time performance and temperature for
embedded systems with loops. Our basic idea is to
adaptively adjust the workload distribution between
cache and SPM based on the current temperature. For a
problem in which the workload can be estimated a
priori, we present a nonlinear programming formulation
to optimally minimize the total execution time of a
loop under the constraints of SPM size and temperature.
To solve a problem in which the workload is not known a
priori, we propose a temperature-aware adaptive loop
scheduling algorithm called TALS to dynamically
allocate data to cache and SPM at runtime. The
experimental results show that our algorithms can
effectively achieve both performance and temperature
optimization for embedded systems with cache and SPM.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2015:MPA,
author = "Weihua Zhang and Jiaxin Li and Yi Li and Haibo Chen",
title = "Multilevel Phase Analysis",
journal = j-TECS,
volume = "14",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629594",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Phase analysis, which classifies the set of execution
intervals with similar execution behavior and resource
requirements, has been widely used in a variety of
systems, including dynamic cache reconfiguration,
prefetching, race detection, and sampling simulation.
Although phase granularity has been a major factor in
the accuracy of phase analysis, it has not been well
investigated, and most systems usually adopt a
fine-grained scheme. However, such a scheme can only
take account of recent local phase information and
could be frequently interfered by temporary noise due
to instant phase changes, which might notably limit the
accuracy. In this article, we make the first
investigation on the potential of multilevel phase
analysis (MLPA), where different granularity phase
analyses are combined together to improve the overall
accuracy. The key observation is that the
coarse-grained intervals belonging to the same phase
usually consist of stably distributed fine-grained
phases. Moreover, the phase of a coarse-grained
interval can be accurately identified based on the
fine-grained intervals at the beginning of its
execution. Based on the observation, we design and
implement an MLPA scheme. In such a scheme, a
coarse-grained phase is first identified based on the
fine-grained intervals at the beginning of its
execution. The following fine-grained phases in it are
then predicted based on the sequence of fine-grained
phases in the coarse-grained phase. Experimental
results show that such a scheme can notably improve the
prediction accuracy. Using a Markov fine-grained phase
predictor as the baseline, MLPA can improve prediction
accuracy by 20\%, 39\%, and 29\% for next phase, phase
change, and phase length prediction for SPEC2000,
respectively, yet incur only about 2\% time overhead
and 40\% space overhead (about 360 bytes in total). To
demonstrate the effectiveness of MLPA, we apply it to a
dynamic cache reconfiguration system that dynamically
adjusts the cache size to reduce the power consumption
and access time of the data cache. Experimental results
show that MLPA can further reduce the average cache
size by 15\% compared to the fine-grained scheme.
Moreover, for MLPA, we also observe that coarse-grained
phases can better capture the overall program
characteristics with fewer of phases and the last
representative phase could be classified in a very
early program position, leading to fewer execution
internals being functionally simulated. Based on this
observation, we also design a multilevel sampling
simulation technique that combines both fine- and
coarse-grained phase analysis for sampling simulation.
Such a scheme uses fine-grained simulation points to
represent only the selected coarse-grained simulation
points instead of the entire program execution; thus,
it could further reduce both the functional and
detailed simulation time. Experimental results show
that MLPA for sampling simulation can achieve a speedup
in simulation time of about 8.3X with similar accuracy
compared to 10M SimPoint.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Banaiyanmofrad:2015:UFF,
author = "Abbas Banaiyanmofrad and Houman Homayoun and Nikil
Dutt",
title = "Using a Flexible Fault-Tolerant Cache to Improve
Reliability for Ultra Low Voltage Operation",
journal = j-TECS,
volume = "14",
number = "2",
pages = "32:1--32:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629566",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Caches are known to consume a large part of total
microprocessor power. Traditionally, voltage scaling
has been used to reduce both dynamic and leakage power
in caches. However, aggressive voltage reduction causes
process-variation--induced failures in cache SRAM
arrays, which compromise cache reliability. In this
article, we propose FFT-Cache, a flexible
fault-tolerant cache that uses a flexible defect map to
configure its architecture to achieve significant
reduction in energy consumption through aggressive
voltage scaling while maintaining high error
reliability. FFT-Cache uses a portion of faulty cache
blocks as redundancy-using block-level or line-level
replication within or between sets-to tolerate other
faulty caches lines and blocks. Our configuration
algorithm categorizes the cache lines based on degree
of conflict between their blocks to reduce the
granularity of redundancy replacement. FFT-Cache
thereby sacrifices a minimal number of cache lines to
avoid impacting performance while tolerating the
maximum amount of defects. Our experimental results on
a processor executing SPEC2K benchmarks demonstrate
that the operational voltage of both L1/L2 caches can
be reduced down to 375 mV, which achieves up to 80\%
reduction in the dynamic power and up to 48\% reduction
in the leakage power. This comes with only a small
performance loss ({$<$}\%5) and 13\% area overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Owaida:2015:EDS,
author = "Muhsen Owaida and Gabriel Falcao and Joao Andrade and
Christos Antonopoulos and Nikolaos Bellas and Madhura
Purnaprajna and David Novo and Georgios Karakonstantis
and Andreas Burg and Paolo Ienne",
title = "Enhancing Design Space Exploration by Extending
{CPU\slash GPU} Specifications onto {FPGAs}",
journal = j-TECS,
volume = "14",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656207",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The design cycle for complex special-purpose computing
systems is extremely costly and time-consuming. It
involves a multiparametric design space exploration for
optimization, followed by design verification.
Designers of special purpose VLSI implementations often
need to explore parameters, such as optimal bitwidth
and data representation, through time-consuming Monte
Carlo simulations. A prominent example of this
simulation-based exploration process is the design of
decoders for error correcting systems, such as the
Low-Density Parity-Check (LDPC) codes adopted by modern
communication standards, which involves thousands of
Monte Carlo runs for each design point. Currently,
high-performance computing offers a wide set of
acceleration options that range from multicore CPUs to
Graphics Processing Units (GPUs) and Field Programmable
Gate Arrays (FPGAs). The exploitation of diverse target
architectures is typically associated with developing
multiple code versions, often using distinct
programming paradigms. In this context, we evaluate the
concept of retargeting a single OpenCL program to
multiple platforms, thereby significantly reducing
design time. A single OpenCL-based parallel kernel is
used without modifications or code tuning on multicore
CPUs, GPUs, and FPGAs. We use SOpenCL (Silicon to
OpenCL), a tool that automatically converts OpenCL
kernels to RTL in order to introduce FPGAs as a
potential platform to efficiently execute simulations
coded in OpenCL. We use LDPC decoding simulations as a
case study. Experimental results were obtained by
testing a variety of regular and irregular LDPC codes
that range from short/medium (e.g., 8,000 bit) to long
length (e.g., 64,800 bit) DVB-S2 codes. We observe
that, depending on the design parameters to be
simulated, on the dimension and phase of the design,
the GPU or FPGA may suit different purposes more
conveniently, thus providing different acceleration
factors over conventional multicore CPUs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2015:TWA,
author = "Tianzheng Wang and Duo Liu and Yi Wang and Zili Shao",
title = "Towards Write-Activity-Aware Page Table Management for
Non-volatile Main Memories",
journal = j-TECS,
volume = "14",
number = "2",
pages = "34:1--34:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2697394",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Non-volatile memories such as phase change memory
(PCM) and memristor are being actively studied as an
alternative to DRAM-based main memory in embedded
systems because of their properties, which include low
power consumption and high density. Though PCM is one
of the most promising candidates with commercial
products available, its adoption has been greatly
compromised by limited write endurance. As main memory
is one of the most heavily accessed components, it is
critical to prolong the lifetime of PCM. In this
article, we present {Write- Activity-aware Page Table
Management} (WAPTM), a simple yet effective page table
management scheme for reducing unnecessary writes, by
redesigning system software and exploiting
write-activity-aware features provided by the hardware.
We implemented WAPTM in Google Android based on the ARM
architecture and evaluated it with real Android
applications. Experimental results show that WAPTM can
significantly reduce writes in page tables, proving the
feasibility and potential of prolonging the lifetime of
PCM-based main memory through reducing writes at the OS
level.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tsai:2015:JPI,
author = "Chun-Jen Tsai and Han-Wen Kuo and Zigang Lin and
Zi-Jing Guo and Jun-Fu Wang",
title = "A {Java} Processor {IP} Design for Embedded {SoC}",
journal = j-TECS,
volume = "14",
number = "2",
pages = "35:1--35:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629649",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2000.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present a reusable Java processor
IP for application processors of embedded systems. For
the Java microarchitecture, we propose a low-cost stack
memory design that supports a two-fold instruction
folding pipeline and a low-complexity Java exception
handling hardware. We also propose a mapping between
the Java dynamic class loading model and the SoC
platform-based design principle so that the Java core
can be encapsulated as a reusable IP. To achieve this
goal, a two-level method area with two on-chip circular
buffers is proposed as an interface between the RISC
core and the Java core. The proposed architecture is
implemented on a Xilinx Virtex-5 FPGA device.
Experimental results show that its performance has some
advantages over other Java processors and a Java VM
with JIT acceleration on a PowerPC platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ttofis:2015:HEA,
author = "Christos Ttofis and Christos Kyrkou and Theocharis
Theocharides",
title = "A Hardware-Efficient Architecture for Accurate
Real-Time Disparity Map Estimation",
journal = j-TECS,
volume = "14",
number = "2",
pages = "36:1--36:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629699",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Emerging embedded vision systems utilize disparity
estimation as a means to perceive depth information to
intelligently interact with their host environment and
take appropriate actions. Such systems demand high
processing performance and accurate depth perception
while requiring low energy consumption, especially when
dealing with mobile and embedded applications, such as
robotics, navigation, and security. The majority of
real-time dedicated hardware implementations of
disparity estimation systems have adopted local
algorithms relying on simple cost aggregation
strategies with fixed and rectangular correlation
windows. However, such algorithms generally suffer from
significant ambiguity along depth borders and areas
with low texture. To this end, this article presents
the hardware architecture of a disparity estimation
system that enables good performance in both accuracy
and speed. The architecture implements an adaptive
support weight stereo correspondence algorithm that
integrates image segmentation information in an attempt
to increase the robustness of the matching process. The
article also presents hardware-oriented algorithmic
modifications/optimization techniques that make the
algorithm hardware-friendly and suitable for efficient
dedicated hardware implementation. A comparison to the
literature asserts that an FPGA implementation of the
proposed architecture is among the fastest
implementations in terms of million disparity
estimations per second (MDE/s), and with an overall
accuracy of 90.21\%, it presents an effective
processing speed/disparity map accuracy trade-off.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Peon-quiros:2015:PLD,
author = "Miguel Pe{\'o}n-quir{\'o}s and Alexandros Bartzas and
Stylianos Mamagkakis and Francky Catthoor and Jos{\'e}
Manuel Mend{\'\i}as and Dimitrios Soudris",
title = "Placement of Linked Dynamic Data Structures over
Heterogeneous Memories in Embedded Systems",
journal = j-TECS,
volume = "14",
number = "2",
pages = "37:1--37:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656208",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Software applications use dynamic memory (allocated
and deallocated in the system's heap) to handle
dynamism in their working conditions. Embedded systems
tend to include complex memory organizations but most
techniques for dynamic memory management do not deal
with the placement of data objects in physical memory
modules. Additionally, the performance of
hardware-controlled cache memories may be severely
hindered when used with linked data structures. We
therefore present a methodology to map dynamic data on
the multilevel memory subsystem of embedded systems,
taking advantage of any available memories (e.g.,
on-chip SRAMs) and avoiding interference with the cache
memories. The resulting data placement uses an
exclusive memory model and is compatible with existing
techniques for managing static data. Our methodology
helps the designer achieve reductions in energy
consumption and execution time that can be obtained by
an expert in an automated way while keeping control
over the process through multiple configuration
knobs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Segarra:2015:ASP,
author = "Juan Segarra and Clemente Rodr{\'\i}guez and Rub{\'e}n
Gran and Luis C. Aparicio and V{\'\i}ctor Vi{\~n}als",
title = "{ACDC}: Small, Predictable and High-Performance Data
Cache",
journal = j-TECS,
volume = "14",
number = "2",
pages = "38:1--38:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2677093",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In multitasking real-time systems, the worst-case
execution time (WCET) of each task and also the effects
of interferences between tasks in the worst-case
scenario need to be calculated. This is especially
complex in the presence of data caches. In this
article, we propose a small instruction-driven data
cache (256 bytes) that effectively exploits locality.
It works by preselecting a subset of memory
instructions that will have data cache replacement
permission. Selection of such instructions is based on
data reuse theory. Since each selected memory
instruction replaces its own data cache line, it
prevents pollution and performance in tasks becomes
independent of the size of the associated data
structures. We have modeled several memory
configurations using the Lock-MS WCET analysis method.
Our results show that, on average, our data cache
effectively services 88\% of program data of the tested
benchmarks. Such results double the worst-case
performance of our tested multitasking experiments. In
addition, in the worst case, they reach between 75\%
and 89\% of the ideal case of always hitting in
instruction and data caches. As well, we show that
using partitioning on our proposed hardware only
provides marginal benefits in worst-case performance,
so using partitioning is discouraged. Finally, we study
the viability of our proposal in the MiBench
application suite by characterizing its data reuse,
achieving hit ratios beyond 90\% in most programs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bellasi:2015:ERR,
author = "Patrick Bellasi and Giuseppe Massari and William
Fornaciari",
title = "Effective Runtime Resource Management Using {Linux}
Control Groups with the {BarbequeRTRM} Framework",
journal = j-TECS,
volume = "14",
number = "2",
pages = "39:1--39:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2658990",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "The extremely high technology process reached by
silicon manufacturing (smaller than 32nm) has led to
production of computational platforms and SoC,
featuring a considerable amount of resources. Whereas
from one side such multi- and many-core platforms show
growing performance capabilities, from the other side
they are more and more affected by power, thermal, and
reliability issues. Moreover, the increased
computational capabilities allows congested usage
scenarios with workloads subject to mixed and
time-varying requirements. Effective usage of the
resources should take into account both the application
requirements and resources availability, with an
arbiter, namely a resource manager in charge to solve
the resource contention among demanding applications.
Current operating systems (OS) have only a limited
knowledge about application-specific behaviors and
their time-varying requirements. Dedicated system
interfaces to collect such inputs and forward them to
the OS (e.g., its scheduler) are thus an interesting
research area that aims at integrating the OS with an
ad hoc resource manager. Such a component can exploit
efficient low-level OS interfaces and mechanisms to
extend its capabilities of controlling tasks and system
resources. Because of the specific tasks and timings of
a resource manager, this component can be easily and
effectively developed as a user-space extension lying
in between the OS and the controlled application. This
article, which focuses on multicore Linux systems,
shows a portable solution to enforce runtime resource
management decisions based on the standard control
groups framework. A burst and a mixed workload
analysis, performed on a multicore-based NUMA platform,
have reported some promising results both in terms of
performance and power saving.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schaumont:2015:IEP,
author = "Patrick Schaumont and Maire O'Neill and Tim
G{\"u}neysu",
title = "Introduction for Embedded Platforms for Cryptography
in the Coming Decade",
journal = j-TECS,
volume = "14",
number = "3",
pages = "40:1--40:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2745710",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2015:ESD,
author = "Sandeep K. Shukla",
title = "Editorial: Schizoid Design for Critical Embedded
Systems",
journal = j-TECS,
volume = "14",
number = "3",
pages = "40e:1--40e:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2761728",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Howe:2015:PLB,
author = "James Howe and Thomas P{\"o}ppelmann and M{\'a}ire
O'Neill and Elizabeth O'Sullivan and Tim G{\"u}neysu",
title = "Practical Lattice-Based Digital Signature Schemes",
journal = j-TECS,
volume = "14",
number = "3",
pages = "41:1--41:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2724713",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Digital signatures are an important primitive for
building secure systems and are used in most real-world
security protocols. However, almost all popular
signature schemes are either based on the factoring
assumption (RSA) or the hardness of the discrete
logarithm problem (DSA/ECDSA). In the case of classical
cryptanalytic advances or progress on the development
of quantum computers, the hardness of these closely
related problems might be seriously weakened. A
potential alternative approach is the construction of
signature schemes based on the hardness of certain
lattice problems that are assumed to be intractable by
quantum computers. Due to significant research
advancements in recent years, lattice-based schemes
have now become practical and appear to be a very
viable alternative to number-theoretic cryptography. In
this article, we focus on recent developments and the
current state of the art in lattice-based digital
signatures and provide a comprehensive survey
discussing signature schemes with respect to
practicality. Additionally, we discuss future research
areas that are essential for the continued development
of lattice-based cryptography.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boorghany:2015:CIL,
author = "Ahmad Boorghany and Siavash Bayat Sarmadi and Rasool
Jalili",
title = "On Constrained Implementation of Lattice-Based
Cryptographic Primitives and Schemes on Smart Cards",
journal = j-TECS,
volume = "14",
number = "3",
pages = "42:1--42:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700078",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most lattice-based cryptographic schemes with a
security proof suffer from large key sizes and heavy
computations. This is also true for the simpler case of
authentication protocols that are used on smart cards
as a very-constrained computing environment. Recent
progress on ideal lattices has significantly improved
the efficiency and made it possible to implement
practical lattice-based cryptography on constrained
devices. However, to the best of our knowledge, no
previous attempts have been made to implement
lattice-based schemes on smart cards. In this article,
we provide the results of our implementation of several
state-of-the-art lattice-based authentication protocols
on smart cards and a microcontroller widely used in
smart cards. Our results show that only a few of the
proposed lattice-based authentication protocols can be
implemented using limited resources of such constrained
devices; however, cutting-edge ones are suitably
efficient to be used practically on smart cards.
Moreover, we have implemented fast Fourier transform
(FFT) and discrete Gaussian sampling with different
typical parameter sets, as well as versatile
lattice-based public-key encryptions. These results
have noticeable points that help to design or optimize
lattice-based schemes for constrained devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aysu:2015:FRT,
author = "Aydin Aysu and Bilgiday Yuce and Patrick Schaumont",
title = "The Future of Real-Time Security: Latency-Optimized
Lattice-Based Digital Signatures",
journal = j-TECS,
volume = "14",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2724714",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advances in quantum computing have spurred a
significant amount of research into public-key
cryptographic algorithms that are resistant against
postquantum cryptanalysis. Lattice-based cryptography
is one of the important candidates because of its
reasonable complexity combined with reasonable
signature sizes. However, in a postquantum world, not
only the cryptography will change but also the
computing platforms. Large amounts of
resource-constrained embedded systems will connect to a
cloud of powerful server computers. We present an
optimization technique for lattice-based signature
generation on such embedded systems; our goal is to
optimize latency rather than throughput. Indeed, on an
embedded system, the latency of a single signature for
user identification or message authentication is more
important than the aggregate signature generation rate.
We build a high-performance implementation using
hardware\slash software codesign techniques. The key
idea is to partition the signature generation scheme
into offline and online phases. The signature scheme
allows this separation because a large portion of the
computation does not depend on the message to be signed
and can be handled before the message is given. Then,
we can map complex precomputation operations in
software on a low-cost processor and utilize hardware
resources to accelerate simpler online operations. To
find the optimum hardware architecture for the target
platform, we define and explore the design space and
implement two design configurations. We realize our
solutions on the Altera Cyclone-IV CGX150 FPGA. The
implementation consists of a NIOS soft-core processor
and a low-latency hash and polynomial multiplication
engine. On average, the proposed low-latency
architecture can generate a signature with a latency of
96 clock cycles at 40MHz, resulting in a response time
of 2.4 $ \mu $s for a signing request. On equivalent
platforms, this corresponds to a performance
improvement of 33 and 105 times compared to previous
hardware and software implementations, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{VonMaurich:2015:IQM,
author = "Ingo {Von Maurich} and Tobias Oder and Tim
G{\"u}neysu",
title = "Implementing {QC--MDPC} {McEliece} Encryption",
journal = j-TECS,
volume = "14",
number = "3",
pages = "44:1--44:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700102",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With respect to performance, asymmetric code-based
cryptography based on binary Goppa codes has been
reported as a highly interesting alternative to RSA and
ECC. A major drawback is still the large keys in the
range between 50 and 100KB that prevented real-world
applications of code-based cryptosystems so far. A
recent proposal by Misoczki et al. showed that
quasi-cyclic moderate-density parity-check (QC-MDPC)
codes can be used in McEliece encryption, reducing the
public key to just 0.6KB to achieve an 80-bit security
level. In this article, we provide optimized decoding
techniques for MDPC codes and survey several efficient
implementations of the QC-MDPC McEliece cryptosystem.
This includes high-speed and lightweight architectures
for reconfigurable hardware, efficient coding styles
for ARM's Cortex-M4 microcontroller, and novel
high-performance software implementations that fully
employ vector instructions. Finally, we conclude that
McEliece encryption in combination with QC-MDPC codes
not only enables high-performance implementations but
also allows for lightweight designs on a wide range of
different platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Massolino:2015:OSC,
author = "Pedro Maat C. Massolino and Paulo S. L. M. Barreto and
Wilson V. Ruggiero",
title = "Optimized and Scalable Co-Processor for {McEliece}
with Binary {Goppa} Codes",
journal = j-TECS,
volume = "14",
number = "3",
pages = "45:1--45:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2736284",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Asymmetric cryptographic primitives are essential to
enable secure communications in public networks or
public mediums. Such primitives can be deployed as
software libraries or hardware co-processors, the
latter being more commonly employed in systems on chip
(SoC) scenarios, embedded devices, or
application-specific servers. Unfortunately, the most
commonly available solutions, based on RSA or elliptic
curve cryptography (ECC), are highly processing
intensive due to the underlying extended-precision
modular arithmetic. Consequently, they are not
available on highly constrained platforms. Aiming to
tackle this issue, we here investigate an alternative
asymmetric encryption scheme that relies on lightweight
arithmetic: McEliece. This scheme is especially
appealing because, being based on error correction
codes, it displays a simpler arithmetic and leads to
better performance when compared to RSA or ECC. To
evaluate the implementation of this scheme in hardware,
we propose and analyze a flexible architecture whose
security level and time versus area usage
characteristics can be reconfigured as desired. The
proposed architecture is suitable to all usual security
levels, ranging from 80 to 256 bits. It is also very
efficient, being able to perform data decryption with
binary Goppa codes in 56$ \mu $s with 3,402 slices on
a Xilinx Spartan-3AN FPGA, whereas the best-known
result in the literature for the same FPGA is 115$ \mu
$s with 7,331 slices. Alternatively, the architecture
can operate with quasi-dyadic Goppa (QD-Goppa) codes,
which involves smaller keys than traditional binary
Goppa codes. In the latter case, for an 80-bit security
level, the decryption operation can take from 1.1ms
with 1,129 slices to 68$ \mu $s with 8,268 slices. By
choosing a more hardware-friendly decoding algorithm,
focusing hardware resources on most bottleneck
operations and sharing hardware resource for two
different algorithms, better results than the those in
the literature were obtained.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Millo:2015:MAD,
author = "Jean-Vivien Millo and Emilien Kofman and Robert {De
Simone}",
title = "Modeling and Analyzing Dataflow Applications on
{NoC}-Based Many-Core Architectures",
journal = j-TECS,
volume = "14",
number = "3",
pages = "46:1--46:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700081",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The advent of chip-level parallel architectures
prompted a renewal of interest into dataflow process
networks. The trend is to model an application
independently from the architecture, then the model is
morphed to best fit the target architecture. One
downplayed aspect is the mapping of communications
through the on-chip topology. The cost of such
communications is often prevalent with regard to
computations. This article establishes a dataflow
process network called K-periodically Routed Graph
(KRG), which serves the role of representing the
various routing decisions during the transformation of
a genuine application into a architecture-aware version
for this application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Davis:2015:GPM,
author = "Robert I. Davis and Alan Burns and Jose Marinho and
Vincent Nelis and Stefan M. Petters and Marko
Bertogna",
title = "Global and Partitioned Multiprocessor Fixed Priority
Scheduling with Deferred Preemption",
journal = j-TECS,
volume = "14",
number = "3",
pages = "47:1--47:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2739954",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces schedulability analysis for
Global Fixed Priority Scheduling with Deferred
Preemption (gFPDS) for homogeneous multiprocessor
systems. gFPDS is a superset of Global Fixed Priority
Preemptive Scheduling (gFPPS) and Global Fixed Priority
Nonpreemptive Scheduling (gFPNS). We show how
schedulability can be improved using gFPDS via
appropriate choice of priority assignment and final
nonpreemptive region lengths, and provide algorithms
that optimize schedulability in this way. Via an
experimental evaluation we compare the performance of
multiprocessor scheduling using global approaches:
gFPDS, gFPPS, and gFPNS, and also partitioned
approaches employing FPDS, FPPS, and FPNS on each
processor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tilli:2015:GCR,
author = "Andrea Tilli and Andrea Bartolini and Matteo Cacciari
and Luca Benini",
title = "Guaranteed Computational Resprinting via
Model-Predictive Control",
journal = j-TECS,
volume = "14",
number = "3",
pages = "48:1--48:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2724715",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today and future many-core systems are facing the
utilization wall and dark silicon problems, for which
not all the processing engines can be powered at the
same time as this will lead to a power consumption
higher than the Total Design Power (TDP) budget.
Recently, computational sprinting approaches addressed
the problem by exploiting the intrinsic thermal
capacitance of the chip and the properties of common
applications, which require intense, but temporary, use
of resources. The thermal capacitance, possibly
augmented with phase change materials, enables the
temporary activation of all the resources
simultaneously, although they largely exceed the
steady-state thermal design power. In this article, we
present an innovative and low-overhead hierarchical
model-predictive controller for managing thermally safe
sprinting with predictable resprinting rate, which
ensures the correct execution of mixed-criticality
tasks. Well-targeted simulations, also based on real
workload benchmarks, show the applicability and the
effectiveness of our solution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sayyah:2015:VPB,
author = "Parinaz Sayyah and Mihai T. Lazarescu and Sara Bocchio
and Emad Ebeid and Gianluca Palermo and Davide Quaglia
and Alberto Rosti and Luciano Lavagno",
title = "Virtual Platform-Based Design Space Exploration of
Power-Efficient Distributed Embedded Applications",
journal = j-TECS,
volume = "14",
number = "3",
pages = "49:1--49:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2723161",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Networked embedded systems are essential building
blocks of a broad variety of distributed applications
ranging from agriculture to industrial automation to
healthcare and more. These often require specific
energy optimizations to increase the battery lifetime
or to operate using energy harvested from the
environment. Since a dominant portion of power
consumption is determined and managed by software, the
software development process must have access to the
sophisticated power management mechanisms provided by
state-of-the-art hardware platforms to achieve the best
tradeoff between system availability and reactivity.
Furthermore, internode communications must be
considered to properly assess the energy consumption.
This article describes a design flow based on a SystemC
virtual platform including both accurate power models
of the hardware components and a fast abstract model of
the wireless network. The platform allows both
model-driven design of the application and the
exploration of power and network management
alternatives. These can be evaluated in different
network scenarios, allowing one to exploit power
optimization strategies without requiring expensive
field trials. The effectiveness of the approach is
demonstrated via experiments on a wireless body area
network application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tamas-Selicean:2015:DOM,
author = "Domitian Tamas-Selicean and Paul Pop",
title = "Design Optimization of Mixed-Criticality Real-Time
Embedded Systems",
journal = j-TECS,
volume = "14",
number = "3",
pages = "50:1--50:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700103",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we are interested in implementing
mixed-criticality real-time embedded applications on a
given heterogeneous distributed architecture.
Applications have different criticality levels,
captured by their Safety-Integrity Level (SIL), and are
scheduled using static-cyclic scheduling. According to
certification standards, mixed-criticality tasks can be
integrated onto the same architecture only if there is
enough spatial and temporal separation among them. We
consider that the separation is provided by
partitioning, such that applications run in separate
partitions, and each partition is allocated several
time slots on a processor. Tasks of different SILs can
share a partition only if they are all elevated to the
highest SIL among them. Such elevation leads to
increased development costs, which increase
dramatically with each SIL. Tasks of higher SILs can be
decomposed into redundant structures of lower SIL
tasks. We are interested to determine (i) the mapping
of tasks to processors, (ii) the assignment of tasks to
partitions, (iii) the decomposition of tasks into
redundant lower SIL tasks, (iv) the sequence and size
of the partition time slots on each processor, and (v)
the schedule tables, such that all the applications are
schedulable and the development costs are minimized. We
have proposed a Tabu Search-based approach to solve
this optimization problem. The proposed algorithm has
been evaluated using several synthetic and real-life
benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Papagiannopoulou:2015:EEH,
author = "Dimitra Papagiannopoulou and Giuseppe Capodanno and
Tali Moreshet and Maurice Herlihy and R. Iris Bahar",
title = "Energy-Efficient and High-Performance Lock Speculation
Hardware for Embedded Multicore Systems",
journal = j-TECS,
volume = "14",
number = "3",
pages = "51:1--51:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700097",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems are becoming increasingly common in
everyday life and like their general-purpose
counterparts, they have shifted towards shared memory
multicore architectures. However, they are much more
resource constrained, and as they often run on
batteries, energy efficiency becomes critically
important. In such systems, achieving high concurrency
is a key demand for delivering satisfactory performance
at low energy cost. In order to achieve this high
concurrency, consistency across the shared memory
hierarchy must be accomplished in a cost-effective
manner in terms of performance, energy, and
implementation complexity. In this article, we propose
Embedded-Spec, a hardware solution for supporting
transparent lock speculation, without the requirement
for special supporting instructions. Using this
approach, we evaluate the energy consumption and
performance of a suite of benchmarks, exploring a range
of contention management and retry policies. We
conclude that for resource-constrained platforms, lock
speculation can provide real benefits in terms of
improved concurrency and energy efficiency, as long as
the underlying hardware support is carefully
configured.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Santinelli:2015:PCP,
author = "Luca Santinelli and Liliana Cucu-Grosjean",
title = "A Probabilistic Calculus for Probabilistic Real-Time
Systems",
journal = j-TECS,
volume = "14",
number = "3",
pages = "52:1--52:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2717113",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Challenges within real-time research are mostly in
terms of modeling and analyzing the complexity of
actual real-time embedded systems. Probabilities are
effective in both modeling and analyzing embedded
systems by increasing the amount of information for the
description of elements composing the system. Elements
are tasks and applications that need resources,
schedulers that execute tasks, and resource
provisioning that satisfies the resource demand. In
this work, we present a model that considers
component-based real-time systems with component
interfaces able to abstract both the functional and
nonfunctional requirements of components and the
system. Our model faces probabilities and probabilistic
real-time systems unifying in the same framework
probabilistic scheduling techniques and compositional
guarantees varying from soft to hard real time. We
provide an algebra to work with the probabilistic
notation developed and form an analysis in terms of
sufficient probabilistic schedulability conditions for
task systems with either preemptive fixed-priority or
earliest deadline first scheduling paradigms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anand:2015:ICL,
author = "Kapil Anand and Rajeev Barua",
title = "Instruction-Cache Locking for Improving Embedded
Systems Performance",
journal = j-TECS,
volume = "14",
number = "3",
pages = "53:1--53:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700100",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cache memories in embedded systems play an important
role in reducing the execution time of applications.
Various kinds of extensions have been added to cache
hardware to enable software involvement in replacement
decisions, improving the runtime over a purely
hardware-managed cache. Novel embedded systems, such as
Intel's XScale and ARM Cortex processors, facilitate
locking one or more lines in cache; this feature is
called cache locking. We present a method in for
instruction-cache locking that is able to reduce the
average-case runtime of a program. We demonstrate that
the optimal solution for instruction cache locking can
be obtained in polynomial time. However, a fundamental
lack of correlation between cache hardware and software
program points renders such optimal solutions
impractical. Instead, we propose two practical
heuristics-based approaches to achieve cache locking.
First, we present a static mechanism for locking the
cache, in which the locked contents of the cache are
kept fixed over the execution of the program. Next, we
present a dynamic mechanism that accounts for changing
program requirements at runtime. We devise a
cost--benefit model to discover the memory addresses
that should be locked in the cache. We implement our
scheme inside a binary rewriter, widening the
applicability of our scheme to binaries compiled using
any compiler. Results obtained on a suite of MiBench
benchmarks show that our static mechanism results in
20\% improvement in the instruction-cache miss rate on
average and up to 18\% improvement in the execution
time on average for applications having instruction
accesses as a bottleneck, compared to no cache locking.
The dynamic mechanism improves the cache miss rate by
35\% on average and execution time by 32\% on
instruction-cache-constrained applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cooke:2015:FSM,
author = "Patrick Cooke and Lu Hao and Greg Stitt",
title = "Finite-State-Machine Overlay Architectures for Fast
{FPGA} Compilation and Application Portability",
journal = j-TECS,
volume = "14",
number = "3",
pages = "54:1--54:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700082",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Despite significant advantages, wider usage of
field-programmable gate arrays (FPGAs) has been limited
by lengthy compilation and a lack of portability.
Virtual-architecture overlays have partially addressed
these problems, but previous work focuses mainly on
heavily pipelined applications with minimal control
requirements. We expand previous work by enabling more
flexible control via overlay architectures for
finite-state machines. Although not appropriate for
control-intensive circuits, the presented architectures
reduced compilation times of control changes in a
convolution case study from 7 hours to less than 1
second, with no performance overhead and an area
overhead of 0.2\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Watkins:2015:UNT,
author = "Lanier Watkins and William H. Robinson and Raheem
Beyah",
title = "Using Network Traffic to Infer Hardware State: a
Kernel-Level Investigation",
journal = j-TECS,
volume = "14",
number = "3",
pages = "55:1--55:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700094",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we illustrate that the boundary of a
general-purpose node can be extended into the network
by extracting information from network traffic
generated by that general-purpose node to infer the
state of its hardware components. This information is
represented in a delay signature latent within the
network traffic. In contrast, the traditional approach
to determine the internal state of a node's resources
meant that a software application with internal
processes had to be resident on the node. The
aforementioned delay signature is the keystone that
provides a correlation between network traffic and the
internal state of the source node. We characterize this
delay signature by (1) identifying the different types
of assembly language instructions that source this
delay and (2) describing how architectural techniques,
such as instruction pipelining and caching, give rise
to this delay signature. In theory, highly utilized
nodes (due to multiple threads) will contain excessive
context switching and contention for shared resources.
One important shared resource is main memory, and
excessive use of this resource by applications and
internal processes eventually leads to a decrease in
cache efficiency that eventually stalls the instruction
pipeline. Our results support this theory;
specifically, we have observed that excessive context
switching in active applications increases the
effective memory access time and wastes precious CPU
cycles, thus adding additional delay to the execution
of load, store, and other instructions. Because the
operating system (OS) kernel accesses memory to send
network packets, the delay signature is induced into
network traffic in situations where user-level
utilization is high. We demonstrate this theory in two
case studies: (1) resource discovery in cluster grids
and (2) network-based detection of bitcoin mining on
compromised nodes.",
acknowledgement = ack-nhfb,
acmid = "2700094",
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
keywords = "LEON4 processor, clusters assembly language
instructions, grid computing, passive resource
discovery",
pagecount = "22",
}
@Article{Kerrison:2015:EMS,
author = "Steve Kerrison and Kerstin Eder",
title = "Energy Modeling of Software for a Hardware
Multithreaded Embedded Microprocessor",
journal = j-TECS,
volume = "14",
number = "3",
pages = "56:1--56:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700104",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article examines a hardware multithreaded
microprocessor and discusses the impact such an
architecture has on existing software energy modeling
techniques. A framework is constructed for analyzing
the energy behavior of the XMOS XS1-L multithreaded
processor and a variation on existing software energy
models is proposed, based on analysis of collected
energy data. It is shown that by combining execution
statistics with sufficient data on the processor's
thread activity and instruction execution costs, a
multithreaded software energy model used with
Instruction Set Simulation can yield an average error
margin of less than 7\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cilardo:2015:ECA,
author = "Alessandro Cilardo and Edoardo Fusella and Luca Gallo
and Antonino Mazzeo",
title = "Exploiting Concurrency for the Automated Synthesis of
{MPSoC} Interconnects",
journal = j-TECS,
volume = "14",
number = "3",
pages = "57:1--57:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700075",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multiprocessor Systems-on-Chip (MPSoC) applications
can rely today on a very large spectrum of
interconnection topologies potentially meeting given
communication requirements, determining various
trade-offs between cost and performance. Building
interconnects that enable concurrent communication
tasks introduces decisive opportunities for reducing
the overall communication latency. This work identifies
three levels of parallelism at the interconnect level:
global parallelism across different independent
domains; local or intradomain parallelism, relying on
inherently concurrent interconnect components such as
crossbars; and interdomain parallelism, where multiple
concurrent paths across different local domains are
exploited. We propose an automated methodology to
search the design space, aimed at maximizing the
exploitation of these forms of parallelism. The
approach also takes into consideration possible
dependencies between communication tasks, which further
constrains the design space, making the identification
of a feasible solution more challenging. By jointly
solving a scheduling and interconnect synthesis
problem, the methodology turns the description of the
application communication requirements, including data
dependencies, into an on-chip synthesizable
interconnection structure along with a communication
schedule satisfying given area constraints. The article
thoroughly describes the formalisms and the methodology
used to derive such optimized heterogeneous topologies.
It also discusses some case studies emphasizing the
impact of the proposed approach and highlighting the
essential differences with a few other solutions
presented in the technical literature.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Geeraerts:2015:VCA,
author = "Gilles Geeraerts and Alexander Heu{\ss}ner and
Jean-Fran{\c{c}}ois Raskin",
title = "On the Verification of Concurrent, Asynchronous
Programs with Waiting Queues",
journal = j-TECS,
volume = "14",
number = "3",
pages = "58:1--58:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700072",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recently, new libraries, such as Grand Central
Dispatch (GCD), have been proposed to directly harness
the power of multicore platforms and to make the
development of concurrent software more accessible to
software engineers. When using such a library, the
programmer writes so-called blocks, which are chunks of
code, and dispatches them using synchronous or
asynchronous calls to several types of waiting queues.
A scheduler is then responsible for dispatching those
blocks among the available cores. Blocks can
synchronize via a global memory. In this article, we
propose Queue-Dispatch Asynchronous Systems as a
mathematical model that faithfully formalizes the
synchronization mechanisms and behavior of the
scheduler in those systems. We study in detail their
relationships to classical formalisms such as pushdown
systems, Petri nets, Fifo systems, and counter systems.
Our main technical contributions are precise worst-case
complexity results for the Parikh coverability problem
and the termination problem for several subclasses of
our model. We also consider an extension of Qdas with a
fork-join mechanism. Adding fork-join to any of the
subclasses that we have identified leads to
undecidability of the coverability problem. This
motivates the study of over-approximations. Finally, we
consider handmade abstractions as a practical way of
verifying programs that cannot be faithfully modeled by
decidable subclasses of Qdas.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2015:COM,
author = "Kai Huang and Min Yu and Rongjie Yan and Xiaomeng
Zhang and Xiaolang Yan and Lisane Brisolara and Ahmed
Amine Jerraya and Jiong Feng",
title = "Communication Optimizations for Multithreaded Code
Generation from {Simulink} Models",
journal = j-TECS,
volume = "14",
number = "3",
pages = "59:1--59:??",
month = may,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2644811",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:08:56 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Communication frequency is increasing with the growing
complexity of emerging embedded applications and the
number of processors in the implemented multiprocessor
SoC architectures. In this article, we consider the
issue of communication cost reduction during
multithreaded code generation from partitioned Simulink
models to help designers in code optimization to
improve system performance. We first propose a
technique combining message aggregation and
communication pipeline methods, which groups
communications with the same destinations and sources
and parallelizes communication and computation tasks.
We also present a method to apply static analysis and
dynamic emulation for efficient communication buffer
allocation to further reduce synchronization cost and
increase processor utilization. The existing cyclic
dependency in the mapped model may hinder the
effectiveness of the two techniques. We further propose
a set of optimizations involving repartition with
strongly connected threads to maximize the degree of
communication reduction and preprocessing strategies
with available delays in the model to reduce the number
of communication channels that cannot be optimized.
Experimental results demonstrate the advantages of the
proposed optimizations with 11--143\% throughput
improvement.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mathew:2015:NMB,
author = "Jimson Mathew and Rajat Subhra Chakraborty and Durga
Prasad Sahoo and Yuanfan Yang and Dhiraj K. Pradhan",
title = "A Novel Memristor-Based Hardware Security Primitive",
journal = j-TECS,
volume = "14",
number = "3",
pages = "60:1--60:??",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2736285",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Apr 21 17:21:32 MDT 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Memristor is an exciting new addition to the
repertoire of fundamental circuit elements.
Alternatives to many security protocols originally
employing traditional mathematical cryptography involve
novel hardware security primitives, such as Physically
Unclonable Functions (PUFs). In this article, we
propose a novel hybrid memristor-CMOS PUF circuit and
demonstrate its suitability through extensive
simulations of environmental and process variation
effects. The proposed PUF circuit has substantially
less hardware overhead than previously proposed
memristor-based PUF circuits while being inherently
resistant to machine learning-based modeling attacks
because of challenge-dependent delays of the memristor
stages. The proposed PUF can be conveniently used in
many security applications and protocols based on
hardware-intrinsic security.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2015:EBD,
author = "Sandeep K. Shukla",
title = "Editorial: Big Data, {Internet of Things},
Cybersecurity --- A New Trinity of Embedded Systems
Research",
journal = j-TECS,
volume = "14",
number = "4",
pages = "61:1--61:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2820608",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Barkaoui:2015:GES,
author = "Kamel Barkaoui and Luca Bernardinello and Andrey
Mokhov",
title = "Guest Editorial for Special Issue Application of
Concurrency to System Design",
journal = j-TECS,
volume = "14",
number = "4",
pages = "62:1--62:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2809925",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Furbach:2015:MMA,
author = "Florian Furbach and Roland Meyer and Klaus Schneider
and Maximilian Senftleben",
title = "Memory-Model-Aware Testing: a Unified Complexity
Analysis",
journal = j-TECS,
volume = "14",
number = "4",
pages = "63:1--63:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2753761",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "To improve the performance of the memory system,
multiprocessors implement weak memory consistency
models. Weak memory models admit different views of the
processes on their load and store instructions, thus
allowing for computations that are not sequentially
consistent. Program analyses have to take into account
the memory model of the targeted hardware. This is
challenging because numerous memory models have been
developed, and every memory model requires its own
analysis. In this article, we study a prominent
approach to program analysis: testing. The testing
problem takes as input sequences of operations, one for
each process in the concurrent program. The task is to
check whether these sequences can be interleaved to an
execution of the entire program that respects the
constraints of a memory model under consideration. We
determine the complexity of the testing problem for
most of the known memory models. Moreover, we study the
impact on the complexity of parameters, such as the
number of concurrent processes, the length of their
executions, and the number of shared variables. What
differentiates our contribution from related results is
a uniform approach that avoids considering each memory
model on its own. We build upon work of Steinke and
Nutt. They showed that the existing memory models form
a hierarchy where one model is called weaker than
another one if it includes the latter's behavior. Using
the Steinke-Nutt hierarchy, we develop three general
concepts that allow us to quickly determine the
complexity of a testing problem. First, we generalize
the technique of problem reductions from complexity
theory. So-called range reductions propagate hardness
results between memory models, and we apply them to
establish NP lower bounds for the stronger memory
models. Second, for the weaker models, we present
polynomial-time testing algorithms that are inspired by
determinization algorithms for automata. Finally, we
describe a single SAT encoding of the testing problem
that works for all memory models in the Steinke-Nutt
hierarchy to prove their membership in NP. Our results
are general enough to carry over to future weak memory
models. Moreover, they show that SAT solvers are
adequate tools for testing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Knapik:2015:ASB,
author = "Michal Knapik and Artur Meski and Wojciech Penczek",
title = "Action Synthesis for Branching Time Logic: Theory and
Applications",
journal = j-TECS,
volume = "14",
number = "4",
pages = "64:1--64:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2746337",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The article introduces a parametric extension of
Action-Restricted Computation Tree Logic called
pmARCTL. A symbolic fixed-point algorithm providing a
solution to the exhaustive parameter synthesis problem
is proposed. The parametric approach allows for an
in-depth system analysis and synthesis of the correct
parameter values. The time complexity of the problem
and the algorithm is provided. An existential fragment
of pmARCTL (pmEARCTL) is identified, in which all of
the solutions can be generated from a minimal and
unique base. A method for computing this base using
symbolic methods is provided. The prototype tool
SPATULA implementing the algorithm is applied to the
analysis of three benchmarks: faulty
Train-Gate-Controller, Peterson's mutual exclusion
protocol, and a generic pipeline processing network.
The experimental results show efficiency and
scalability of our approach compared to the naive
solution to the problem.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Siirtola:2015:PMI,
author = "Antti Siirtola and Keijo Heljanko",
title = "Parametrised Modal Interface Automata",
journal = j-TECS,
volume = "14",
number = "4",
pages = "65:1--65:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2776892",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Interface theories (ITs) enable us to analyse the
compatibility interfaces and refine them while
preserving their compatibility. However, most ITs are
for finite state interfaces, whereas computing systems
are often parametrised involving components, the number
of which cannot be fixed. We present, to our knowledge,
the first IT that allows us to specify a parametric
number of interfaces. Moreover, we provide a fully
algorithmic procedure, implemented in a tool, for
checking the compatibility of and refinement between
parametrised interfaces. Finally, we show that the
restrictions of the technique are necessary; removing
any of them renders the refinement checking problem
undecidable.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cotard:2015:SHR,
author = "Sylvain Cotard and Audrey Queudet and Jean-Luc
B{\'e}chennec and S{\'e}bastien Faucou and Yvon
Trinquet",
title = "{STM--HRT}: a Robust and Wait-Free {STM} for Hard
Real-Time Multicore Embedded Systems",
journal = j-TECS,
volume = "14",
number = "4",
pages = "66:1--66:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2786979",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces STM-HRT, a nonblocking
wait-free software transactional memory (STM) for hard
real-time (HRT) multicore embedded systems. Resource
access control in HRT systems is usually implemented
with lock-based synchronization. However, these
mechanisms may lead to deadlocks or starvations and do
not scale well with the number of cores. Most existing
nonblocking STM are not suitable for HRT systems,
because it is not possible to find an upper bound of
the execution time for each task. In this article, we
show how STM-HRT can be a robust solution for resource
sharing in HRT multicore systems. We provide a detailed
description of STM-HRT architecture. We propose a set
of arguments to establish the functional correctness of
its concurrency control protocol. Finally, as part of a
real-time analysis, we derive upper bounds on the
computations required to access shared data under
STM-HRT.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bujtor:2015:FSM,
author = "Ferenc Bujtor and Walter Vogler",
title = "Failure Semantics for Modal Transition Systems",
journal = j-TECS,
volume = "14",
number = "4",
pages = "67:1--67:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2746336",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the aim to preserve deadlock freedom, we define a
new refinement preorder for modal transition systems
(MTSs), using an MTS-specific variant of testing
inspired by De Nicola and Hennessy. We characterize
this refinement with a kind of failure semantics and
show that it ``supports itself,'' for example, in the
sense of thoroughness-in contrast to standard modal
refinements. We present a conjunction operator with
respect to our new refinement, which is quite different
from existing ones. It always returns an MTS-again in
contrast to the case of modal refinement. Finally, we
also consider De Nicola's and Hennessy's may- and
must-testing, where the latter leads to a semantics
that is also compositional for hiding.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DeGroote:2015:IAC,
author = "Robert {De Groote} and Philip K. F. H{\"o}lzenspies
and Jan Kuper and Gerard J. M. Smit",
title = "Incremental Analysis of Cyclo-Static Synchronous
Dataflow Graphs",
journal = j-TECS,
volume = "14",
number = "4",
pages = "68:1--68:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2792981",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present a mathematical
characterisation of admissible schedules of
cyclo-static dataflow (csdf) graphs. We demonstrate how
algebra ic manipulation of this characterization is
related to unfolding csdf actors and how this
manipulation allows csdf graphs to be transformed into
mrsdf graphs that are equivalent, in the sense that
they admit the same set of schedules. The presented
transformation allows the rich set of existing analysis
techniques for mrsdf graphs to be applied to csdf
graphs and generalizes the well-known transformations
from csdf and mrsdf into hsdf. Moreover, it gives rise
to an incremental approach to the analysis of csdf
graphs, where approximate analyses are combined with
exact transformations. We show the applicability of
this incremental approach by demonstrating its
effectiveness on the problem of optimizing buffer sizes
under a throughput constraint.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Germanos:2015:DUW,
author = "Vasileios Germanos and Stefan Haar and Victor Khomenko
and Stefan Schwoon",
title = "Diagnosability under Weak Fairness",
journal = j-TECS,
volume = "14",
number = "4",
pages = "69:1--69:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2832910",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In partially observed Petri nets, diagnosis is the
task of detecting whether the given sequence of
observed labels indicates that some unobservable fault
has occurred. Diagnosability is an associated property
of the Petri net, stating that in any possible
execution, an occurrence of a fault can eventually be
diagnosed. In this article, we consider diagnosability
under the weak fairness (WF) assumption, which
intuitively states that no transition from a given set
can stay enabled forever-it must eventually either fire
or be disabled. We show that a previous approach to
WF-diagnosability in the literature has a major flaw
and present a corrected notion. Moreover, we present an
efficient method for verifying WF-diagnosability based
on a reduction to LTL-X model checking. An important
advantage of this method is that the LTL-X formula is
fixed-in particular, the WF assumption does not have to
be expressed as a part of it (which would make the
formula length proportional to the size of the
specification), but rather the ability of existing
model checkers to handle weak fairness directly is
exploited.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2015:SGP,
author = "Gung-Yu Pan and Jed Yang and Jing-Yang Jou and
Bo-Cheng Charles Lai",
title = "Scalable Global Power Management Policy Based on
Combinatorial Optimization for Multiprocessors",
journal = j-TECS,
volume = "14",
number = "4",
pages = "70:1--70:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2811404",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multiprocessors have become the main architecture
trend in modern systems due to the superior
performance; nevertheless, the power consumption
remains a critical challenge. Global power management
(GPM) aims at dynamically finding the power state
combination that satisfies the power budget constraint
while maximizing the overall performance (or vice
versa). Due to the increasing number of cores in a
multiprocessor system, the scalability of GPM policies
has become critical when searching satisfactory state
combinations within acceptable time. This article
proposes a highly scalable policy based on
combinatorial optimization with theoretical proofs,
whereas previous works take exhaustive search or
heuristic methods. The proposed policy first applies an
optimum algorithm to construct a state combination
table in pseudo--polynomial time using dynamic
programming. Then, the state combination is assigned to
cores with minimum transition cost in linear time by
mapping to the network flow problem. Simulation results
show that the proposed policy achieves better system
performance for any given power budget when compared to
the state-of-the-art heuristic. Furthermore, the
proposed policy demonstrates its prominent scalability
with 125 times faster policy runtime for 512 cores.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lu:2015:ECA,
author = "Jing Lu and Ke Bai and Aviral Shrivastava",
title = "Efficient Code Assignment Techniques for Local Memory
on Software Managed Multicores",
journal = j-TECS,
volume = "14",
number = "4",
pages = "71:1--71:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2738039",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Scaling the memory hierarchy is a major challenge when
we scale the number of cores in a multicore processor.
Software Managed Multicore (SMM) architectures come up
as one of the promising solutions. In an SMM
architecture, there are no caches, and each core has
only a local scratchpad memory [Banakar et al. 2002].
As the local memory usually is small, large
applications cannot be directly executed on it. Code
and data of the task mapped to each core need to be
managed between global memory and local memory. This
article solves the problem of efficiently managing code
on an SMM architecture. The primary requirement of
generating efficient code assignments is a correct
management cost model. In this article, we address this
problem by proposing a cost calculation graph. In
addition, we develop two heuristics CMSM (Code Mapping
for Software Managed multicores) and CMSM\_advanced
that result in efficient code management execution on
the local scratchpad memory. Experimental results
collected after executing applications from the MiBench
suite [Guthaus et al. 2001] demonstrate that merely by
adopting the correct management cost calculation, even
using previous code assignment schemes, we can improve
performance by an average of 12\%. Combining the
correct management cost model and a more optimized code
mapping algorithm together, our heuristics can reduce
runtime in more than 80\% of the cases, and by up to
20\% on our set of benchmarks, compared to the
state-of-the-art code assignment approach [Jung et al.
2010]. When compared with Instruction-level Parallelism
(ILP) results, CMSM\_advanced performs an average of
5\% worse. We also simulate the benchmarks on a
cache-based system, and find that the code management
overhead on SMM core with our code management is much
less than memory latency of a cache-based system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kamal:2015:OHC,
author = "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and
Massoud Pedram",
title = "{OPLE}: a Heuristic Custom Instruction Selection
Algorithm Based on Partitioning and Local Exploration
of Application Dataflow Graphs",
journal = j-TECS,
volume = "14",
number = "4",
pages = "72:1--72:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2764458",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, a heuristic custom instruction (CI)
selection algorithm is presented. The proposed
algorithm, which is called OPLE for ``Optimization
based on Partitioning and Local Exploration,'' uses a
combination of greedy and optimal optimization methods.
It searches for the near-optimal solution by reducing
the search space based on partitioning the identified
CI set. The partitioning of the identified set
guarantees the success of the algorithm independent of
the size of the identified set. First, the algorithm
finds the near-optimal CIs from the candidate CIs for
each part. Next, the suggested CIs from different parts
are combined to determine the final selected CI set. To
improve the set of the selected CIs, the solution is
evolved by calling the algorithm iteratively. The
efficacy of the algorithm is assessed by comparing its
performance to those of optimal and nonoptimal methods.
A comparative study is performed for a number of
benchmarks under different area budgets and I/O
constraints. The results reveal higher speedups for the
OPLE algorithm, especially for larger identified
candidate sets and/or small area budgets compared to
those of the nonoptimal solutions. Compared to the
nonoptimal techniques, the proposed algorithm provides
30\% higher speedup improvement on average. The maximum
improvement is 117\%. The results also demonstrate that
in many cases OPLE is able to find the optimal
solution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Palossi:2015:CDP,
author = "Daniele Palossi and Martino Ruggiero and Luca Benini",
title = "{$3$D} {CV} Descriptor on Parallel Heterogeneous
Platforms",
journal = j-TECS,
volume = "14",
number = "4",
pages = "73:1--73:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2733377",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded three-dimensional (3D) Computer Vision (CV)
is considered a technology enabler for future consumer
applications, attracting a wide interest in academia
and industry. However, 3D CV processing is a
computation-intensive task. Its high computational cost
is directly related to the processing of 3D point
clouds, with the 3D descriptor computation representing
one of the main bottlenecks. Understanding the main
computational challenges of 3D CV applications, as well
as the key characteristics, enabling features, and
limitations of current computing platforms, is clearly
strategic to identify the directions of evolution for
future embedded processing systems targeting 3D CV. In
this work, an innovative and complex 3D descriptor
(called SHOT) has been ported on a high-end and an
embedded computing platform. The high-end system is
composed by a high-performance Intel CPU coupled with a
Nvidia GPU. The embedded platform is, instead, composed
by an ARM-based processor, coupled with the STHORM
accelerator. STHORM is a many-core low-power
accelerator developed by ST Microelectronics, featuring
up to 64 computational units. The SHOT descriptor has
been parallelized using the OpenCL programming model
for both platforms. Finally, we have performed an
in-depth performance comparison and analysis between
general-purpose processors and accelerators in both
high-end and embedded domains, discussing and
highlighting the main differences in the
Hardware/Software (HW/SW) design methodologies and
approaches between high-end and embedded systems
targeting 3D CV applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2015:CIB,
author = "Guohui Li and Yi Zhang and Jianjun Li",
title = "{Crenel}-Interval-Based Dynamic Power Management for
Periodic Real-Time Systems",
journal = j-TECS,
volume = "14",
number = "4",
pages = "74:1--74:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2744197",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In order to save the energy consumption of real-time
embedded systems, the integration of Dynamic Voltage
and Frequency Scaling (DVFS) and Device Power
Management (DPM) techniques has been well studied. In
this article, we propose a new energy management scheme
for periodic real-time tasks with implicit deadlines.
We mainly focus on the DPM part by presenting a novel
approach to the real-time DPM problem. Specifically, we
first identify intervals for each device, which we
refer to as Crenel Intervals, by partitioning the
Earliest Deadline First (EDF) schedule of the tasks
that need to access the device into successive
intervals. The principle for identifying Crenel
Intervals is that for each task, there is only one
deadline located in each Crenel Interval. Next,
targeting at a single device model and a multiple
device model, respectively, we propose the CI-EDF and
CI-EDF$^m$ algorithms to schedule task instances in
each Crenel Interval, so as to form long and continuous
slacks in each Crenel Interval but without jeopardizing
any task deadlines. Then, the slack in the Crenel
Intervals can be utilized to perform not only DPM, but
also DVFS. The experimental results show that our
approaches can achieve considerably more energy savings
than existing techniques with comparable quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mihajlovic:2015:AAR,
author = "Bojan Mihajlovi{\'c} and Zeljko Zili{\'c} and Warren
J. Gross",
title = "Architecture-Aware Real-Time Compression of Execution
Traces",
journal = j-TECS,
volume = "14",
number = "4",
pages = "75:1--75:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2766449",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In recent years, on-chip trace generation has been
recognized as a solution to the debugging of
increasingly complex software. An execution trace can
be seen as the most fundamentally useful type of trace,
allowing the execution path of software to be
determined post hoc. However, the bandwidth required to
output such a trace can be excessive. Our
architecture-aware trace compression (AATC) scheme adds
an on-chip branch predictor and branch target buffer to
reduce the volume of execution trace data in real time
through on-chip compression. Novel redundancy reduction
strategies are employed, most notably in exploiting the
widespread use of linked branches and the
compiler-driven movement of return addresses between
link register, stack, and program counter. In doing so,
the volume of branch target addresses is reduced by
52\%, whereas other algorithmic improvements further
decrease trace volume. An analysis of spatial and
temporal redundancy in the trace stream allows a
comparison of encoding strategies to be made for
systematically increasing compression performance. A
combination of differential, Fibonacci, VarLen, and
Move-to-Front encodings are chosen to produce two
compressor variants: a performance-focused xAATC that
encodes 56.5 instructions/bit using 24,133 gates and an
area-efficient fAATC that encodes 48.1 instructions/bit
using only 9,854 gates.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bak:2015:SPD,
author = "Stanley Bak and Zhenqi Huang and Fardin Abdi Taghi
Abad and Marco Caccamo",
title = "Safety and Progress for Distributed Cyber-Physical
Systems with Unreliable Communication",
journal = j-TECS,
volume = "14",
number = "4",
pages = "76:1--76:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2739046",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-physical systems (CPSs) may interact and
manipulate objects in the physical world, and therefore
formal guarantees about their behavior are strongly
desired. Static-time proofs of safety invariants,
however, may be intractable for systems with
distributed physical-world interactions. This is
further complicated when realistic communication models
are considered, for which there may not be bounds on
message delays, or even when considering that messages
will eventually reach their destination. In this work,
we address the challenge of proving safety and progress
in distributed CPSs communicating over an unreliable
communication layer. We show that for this type of
communication model, system safety is closely related
to the results of a hybrid system's reachability
computation, which can be computed at runtime. However,
since computing reachability at runtime may be
computationally intensive, we provide an approach that
moves significant parts of the computation to design
time. This approach is demonstrated with a case study
of a simulation of multiple vehicles moving within a
shared environment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Catania:2015:PSR,
author = "Vincenzo Catania and Andrea Araldo and Davide Patti",
title = "Parameter Space Representation of {Pareto} Front to
Explore Hardware--Software Dependencies",
journal = j-TECS,
volume = "14",
number = "4",
pages = "77:1--77:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2764457",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems design requires conflicting
objectives to be optimized with an appropriate choice
of hardware-software parameters. A simulation campaign
can guide the design in finding the best trade-offs,
but due to the big number of possible configurations,
it is often infeasible to simulate them all. For these
reasons, design space exploration algorithms aim at
finding near-optimal system configurations by
simulating only a subset of them. In this work, we
present PS, a new multiobjective optimization
algorithm, and evaluate it in the context of the
embedded system design. The basic idea is to recognize
interesting regions-that is, regions of the
configuration space that provide better configurations
with respect to other ones. PS evaluates more
configurations in the interesting regions while less
thoroughly exploring the rest of the configuration
space. After a detailed formal description of the
algorithm and the underlying concepts, we show a case
study involving the hardware/software exploration of a
VLIW architecture. Qualitative and quantitative
comparisons of PS against a well-known multiobjective
genetic approach demonstrate that while not
outperforming it in terms of Pareto dominance, the
proposed approach can balance the uniformity and
granularity qualities of the solutions found, obtaining
more extended Pareto fronts that provide a wider view
of the potentiality of the designed device. Therefore,
PS represents a further valid choice for the designer
when objective constrains allow it.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Matthews:2015:PTS,
author = "Adam Matthews and Stanislav Bobovych and Nilanjan
Banerjee and James P. Parkerson and Ryan Robucci and
Chintan Patel",
title = "{Perpetuu}: a Tiered Solar-powered {GIS} Microserver",
journal = j-TECS,
volume = "14",
number = "4",
pages = "78:1--78:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2767128",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The aftermath of a natural disaster is characterized
by lack of a reliable medium for dissemination of
information to survivors. The state-of-the-art
emergency response systems rely on satellite
radio-enabled devices, but survivors, unlike first
responders, do not have access to such devices. To
mitigate this problem, we present perpetuu, a
solar-powered portable GIS microserver. The microserver
node can be deployed in a disaster scene and can serve
maps to survivors viewable on browsers of off-the-shelf
mobile systems. The perpetuu nodes can form a wireless
mesh to cover a large geographic region. A key
innovation in the design of the perpetuu node is a
tiered software and hardware architecture --- the
system combines a low-power micro-controller with a
high-power micro-processor to provide a large spectrum
of power states. perpetuu stays in its lowest power
state most of the time, and it can in-vitro detect
survivors using Wi-Fi sensing, and consequently wake up
the higher-power tier to disseminate high-resolution
maps on standard web browsers that provide directions
to safe locations. The tiered design leverages
hardware-assisted energy measurements and a wakeup
controller to balance energy harvested from solar
panels with energy consumed by the system. We evaluate
perpetuu using measurements from our prototype and
trace-based simulations, and show that it can function
near-perpetually while serving maps to a large number
of survivors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Medhat:2015:RMC,
author = "Ramy Medhat and Borzoo Bonakdarpour and Deepak Kumar
and Sebastian Fischmeister",
title = "Runtime Monitoring of Cyber-Physical Systems Under
Timing and Memory Constraints",
journal = j-TECS,
volume = "14",
number = "4",
pages = "79:1--79:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2744196",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The goal of runtime monitoring is to inspect the
well-being of a system by employing a monitor process
that reads the state of the system during execution and
evaluates a set of properties expressed in some
specification language. The main challenge in runtime
monitoring is dealing with the costs imposed in terms
of resource utilization. In the context of
cyber-physical systems, it is crucial for a software
monitoring solution to be time predictable to improve
scheduling, as well as support composition of
monitoring solutions with an overall predictable
behavior. Moreover, a small memory footprint is often
required in components of cyber-physical systems,
especially in deeply embedded systems. In this article,
we propose a novel control-theoretic software
monitoring solution for coordinating time
predictability and memory utilization in runtime
monitoring of systems that interact with the physical
world. The controllers attempt to reduce monitoring
jitter and maximize memory utilization while
simultaneously ensuring the soundness of evaluation of
properties. For systems where multiple properties are
required to be monitored simultaneously, we construct a
buffer sharing mechanism in which controllers
dynamically share the memory space to negate the effect
of bursts of environment actions, thus reducing jitter
due to transient high loads. To validate our design
choices, we present three case studies: (1) a Bluetooth
mobile payment system, which shows a sporadic rate of
events during peak hours; (2) a laser beam stabilizer
for target tracking, and (3) a monitoring system for
air/fuel ratio in a car engine exhaust and the CAM
inlet position in the engine's cylinders. The
experimental results of the case studies demonstrate up
to 40\% improvement in time predictability of the
monitoring solution when compared to a basic
event-triggered approach. Moreover, memory utilization
reaches an average of 90\% when using our dynamic
buffer resizing mechanism.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gebotys:2015:SWP,
author = "Catherine H. Gebotys and Brian A. White",
title = "A Sliding Window Phase-Only Correlation Method for
Side-Channel Alignment in a {Smartphone}",
journal = j-TECS,
volume = "14",
number = "4",
pages = "80:1--80:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2783441",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Future wireless embedded devices will be increasingly
powerful, supporting many more applications including
one of the most crucial, security. Although many
embedded devices offer resistance to bus probing
attacks due to their compact size and high levels of
integration, susceptibility to attacks on their
electromagnetic side channel must be analyzed. This
side channel is often quite complex to analyze due to
the complexities of the embedded device including
operating system, interrupts, and so forth. This
article presents a new methodology for analyzing a
complex system's vulnerability to the EM side channel.
The methodology proposes a sliding window phase-only
correlation method for aligning electromagnetic
emanations from a complex smartphone running native
code utilizing an on-chip cache. Unlike previous
research, experimental results demonstrate that data
written to on-chip cache within an advanced 312MHz
0.13um processor executing AES can be attacked
utilizing this new methodology. Furthermore, for the
first time, it has been shown that the point of
side-channel attack is not a spike of increased EM but
an area of low EM amplitude, unlike what is noted in
previous findings. This research is important for
advancing side-channel analysis understanding in
complex embedded processors and ensuring secure
implementations in future embedded ubiquitous
devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2015:RSP,
author = "Qingling Zhao and Zonghua Gu and Haibo Zeng",
title = "Resource Synchronization and Preemption Thresholds
Within Mixed-Criticality Scheduling",
journal = j-TECS,
volume = "14",
number = "4",
pages = "81:1--81:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2783440",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In a mixed-criticality system, multiple tasks with
different levels of criticality may coexist on the same
hardware platform. The scheduling algorithm EDF-VD
(Earliest Deadline First with Virtual Deadlines) has
been proposed for mixed-criticality systems, which
assumes tasks do not share any common resources. We
present MC-SRP (Mixed-Criticality Stack Resource
Policy), a resource synchronization protocol for
EDF-VD, which allows resource sharing among tasks at
the same criticality level and guarantees that each
task is blocked at most once in each criticality mode.
In addition, we present MC-SRPT (MC-SRP with
Thresholds) for reducing the application stack size
requirement in resource-constrained embedded systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2015:SDE,
author = "Ming-Ju Wu and Chun-Jen Tsai",
title = "A Storage Device Emulator for System Performance
Evaluation",
journal = j-TECS,
volume = "14",
number = "4",
pages = "82:1--82:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2785969",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The performance and characteristics of the storage
devices used in embedded systems can have a great
influence on the overall end user experience. When
building embedded systems or designing new storage
device components, it is important for the designers to
be able to evaluate how storage devices of different
characteristics will affect the overall system
performance. Storage device emulation enables a
system's performance to be evaluated with simulated
storage devices that are not yet available. In storage
device emulation, the emulated storage device appears
to the operating system (OS) as a real storage device
and its service timings are determined by a disk model,
which simulates the behavior of the target storage
device. In the conventional storage device emulators,
because the OS is running continuously in the real-time
domain, the amount of time that the emulators can spend
on processing each I/O request is limited by the
service time of each corresponding I/O request. This
timing constraint can make emulating high-speed storage
devices a challenge for the conventional storage device
emulators. In this article, we propose an OS state
pausing approach to storage device emulation that can
overcome the timing constraints faced by the
conventional storage device emulators. By pausing the
state of the OS while the storage device emulator is
busy, the proposed emulator can spend as much time as
it needs for processing each I/O request without
affecting the performance of the emulated storage
device as perceived by the OS. This allows the proposed
storage device emulator to emulate storage devices that
would otherwise be challenging or even impossible for
the conventional storage device emulators. In addition,
the main task of storage device emulation is offloaded
to an external computer to minimize the impact of the
emulation workload on the target machine. The proposed
storage device emulator is implemented with the Linux
OS$^1$ on an embedded system development board.
Experimental results show that the full-system
performance benchmarks measured with the proposed
storage device emulator are within 2\% differences
compared to the results of the reference system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mirzoyan:2015:MNG,
author = "Davit Mirzoyan and Benny Akesson and Sander Stuijk and
Kees Goossens",
title = "Maximizing the Number of Good Dies for Streaming
Applications in {NoC}-Based0 {MPSoCs} Under Process
Variation",
journal = j-TECS,
volume = "14",
number = "4",
pages = "83:1--83:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2785968",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Scaling CMOS technology into nanometer feature-size
nodes has made it practically impossible to precisely
control the manufacturing process. This results in
variation in the speed and power consumption of a
circuit. As a solution to process-induced variations,
circuits are conventionally implemented with
conservative design margins to guarantee the target
frequency of each hardware component in manufactured
multiprocessor chips. This approach, referred to as
worst-case design, results in a considerable circuit
upsizing, in turn reducing the number of dies on a
wafer. This work deals with the design of real-time
systems for streaming applications (e.g., video
decoders) constrained by a throughput requirement
(e.g., frames per second) with reduced design margins,
referred to as better-than-worst-case design. To this
end, the first contribution of this work is a complete
modeling framework that captures a streaming
application mapped to an NoC-based multiprocessor
system with voltage-frequency islands under
process-induced die-to-die and within-die frequency
variations. The framework is used to analyze the impact
of variations in the frequency of hardware components
on application throughput at the system level. The
second contribution of this work is a methodology to
use the proposed framework and estimate the impact of
reducing circuit design margins on the number of good
dies that satisfy the throughput requirement of a
real-time streaming application. We show on both
synthetic and real applications that the proposed
better-than-worst-case design approach can increase the
number of good dies by up to 9.6\% and 18.8\% for
designs with and without fixed SRAM and IO blocks,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2015:CDR,
author = "Shiwen Zhang and Qingquan Zhang and Sheng Xiao and
Ting Zhu and Yu Gu and Yaping Lin",
title = "Cooperative Data Reduction in Wireless Sensor
Network",
journal = j-TECS,
volume = "14",
number = "4",
pages = "84:1--84:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2786755",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In wireless sensor networks, owing to the limited
energy of the sensor node, it is very meaningful to
propose a dynamic scheduling scheme with data
management that reduces energy as soon as possible.
However, traditional techniques treat data management
as an isolated process on only selected individual
nodes. In this article, we propose an aggressive data
reduction architecture, which is based on error control
within sensor segments and integrates three parallel
dynamic control mechanisms. We demonstrate that this
architecture not only achieves energy savings but also
guarantees the data accuracy specified by the
application. Furthermore, based on this architecture,
we propose two implementations. The experimental
results show that both implementations can raise the
energy savings while keeping the error at an predefined
and acceptable level. We observed that, compared with
the basic implementation, the enhancement
implementation achieves a relatively higher data
accuracy. Moreover, the enhancement implementation is
more suitable for the harsh environmental monitoring
applications. Further, when both implementations
achieve the same accuracy, the enhancement
implementation saves more energy. Extensive experiments
on realistic historical soil temperature data confirm
the efficacy and efficiency of two implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Scheir:2015:ASC,
author = "Marijn Scheir and Josep Balasch and Alfredo Rial and
Bart Preneel and Ingrid Verbauwhede",
title = "Anonymous Split {E}-Cash-Toward Mobile Anonymous
Payments",
journal = j-TECS,
volume = "14",
number = "4",
pages = "85:1--85:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2783439",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Anonymous E-Cash was first introduced in 1982 as a
digital, privacy-preserving alternative to physical
cash. A lot of research has since then been devoted to
extend and improve its properties, leading to the
appearance of multiple schemes. Despite this progress,
the practical feasibility of E-Cash systems is still
today an open question. Payment tokens are typically
portable hardware devices in smart card form, resource
constrained due to their size, and therefore not suited
to support largely complex protocols such as E-Cash.
Migrating to more powerful mobile platforms, for
instance, smartphones, seems a natural alternative.
However, this implies moving computations from trusted
and dedicated execution environments to generic
multiapplication platforms, which may result in
security vulnerabilities. In this work, we propose a
new anonymous E-Cash system to overcome this
limitation. Motivated by existing payment schemes based
on MTM (Mobile Trusted Module) architectures, we
consider at design time a model in which user payment
tokens are composed of two modules: an untrusted but
powerful execution platform (e.g., smartphone) and a
trusted but constrained platform (e.g., secure
element). We show how the protocol's computational
complexity can be relaxed by a secure split of
computations: nonsensitive operations are delegated to
the powerful platform, while sensitive computations are
kept in a secure environment. We provide a full
construction of our proposed Anonymous Split E-Cash
scheme and show that it fully complies with the main
properties of an ideal E-Cash system. Finally, we test
its performance by implementing it on an Android
smartphone equipped with a Java-Card-compatible secure
element.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jiang:2015:AEB,
author = "Jian-Min Jiang and Huibiao Zhu and Qin Li and Yongxin
Zhao and Lin Zhao and Shi Zhang and Ping Gong and Zhong
Hong",
title = "Analyzing Event-Based Scheduling in Concurrent
Reactive Systems",
journal = j-TECS,
volume = "14",
number = "4",
pages = "86:1--86:??",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2783438",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Dec 8 17:53:22 MST 2015",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The traditional research on scheduling focuses on task
scheduling and schedulability analysis in concurrent
reactive systems. In this article, we dedicate
ourselves to event-based scheduling. We first formally
define an event-based scheduling policy and propose the
notion of the correctness of a scheduling policy in
terms of weak termination. Then we investigate the
correctness of the decomposition of scheduling controls
and finally obtain a decentralized scheduling method.
The method can automatically decompose the scheduling
policies of a concurrent reactive system into atomic
scheduling policies. Every atomic scheduling policy
corresponds to one subsystem. Each of the subsystems is
a completely independent system, which may be developed
and deployed independently. An experiment demonstrates
these results that may help engineers to design correct
and efficient schedule policies for a concurrent
reactive system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mandal:2016:DIW,
author = "Kalikinkar Mandal and Xinxin Fan and Guang Gong",
title = "Design and Implementation of {Warbler} Family of
Lightweight Pseudorandom Number Generators for Smart
Devices",
journal = j-TECS,
volume = "15",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2808230",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the advent of ubiquitous computing and the
Internet of Things (IoT), the security and privacy
issues for various smart devices such as
radio-frequency identification (RFID) tags and wireless
sensor nodes are receiving increased attention from
academia and industry. A number of lightweight
cryptographic primitives have been proposed to provide
security services for resource-constrained smart
devices. As one of the core primitives, a
cryptographically secure pseudorandom number generator
(PRNG) plays an important role for lightweight embedded
applications. The most existing PRNGs proposed for
smart devices employ true random number generators as a
component, which generally incur significant power
consumption and gate count in hardware. In this
article, we present Warbler family, a new pseudorandom
number generator family based on nonlinear feedback
shift registers (NLFSRs) with desirable randomness
properties. The design of the Warbler family is based
on the combination of modified de Bruijn blocks
together with a nonlinear feedback Welch-Gong (WG)
sequence generator, which enables us to precisely
characterize the randomness properties and to flexibly
adjust the security level of the resulting PRNG. Some
criteria for selecting parameters of the Warbler family
are proposed to offer the maximum level of security.
Two instances of the Warbler family are also described,
which feature two different security levels and are
dedicated to EPC C1 Gen2 RFID tags and wireless sensor
nodes, respectively. The security analysis shows that
the proposed instances not only can pass the
cryptographic statistical tests recommended by the EPC
C1 Gen2 standard and NIST but also are resistant to the
cryptanalytic attacks such as algebraic attacks, cube
attacks, time-memory-data tradeoff attacks,
Mihaljevi{\'c} et al.'s attacks, and weak internal
state and fault injection attacks. Our ASIC
implementations using a 65nm CMOS process demonstrate
that the proposed two lightweight instances of the
Warbler family can achieve good performance in terms of
speed and area and provide ideal solutions for securing
low-cost smart devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Poddar:2016:DHP,
author = "Soumyajit Poddar and Prasun Ghosal and Hafizur
Rahaman",
title = "Design of a High-Performance {CDMA}-Based
Broadcast-Free Photonic Multi-Core Network on Chip",
journal = j-TECS,
volume = "15",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2839301",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Present-day focus on multicore research has not only
increased computing power but also power- and
bandwidth-efficient communication among cores. On-chip
communication networks have become popular today
because of their low energy use and modular structure
compared to bus-based interconnects. Silicon photonics
has further boosted the performance of on-chip
interconnection networks with its low energy-delay
product and high reliability. In current multicore
Network-on-Chip (NoC) architectures, photonics is
playing an important role in transferring large volumes
of data both on- and off-chip. The problem addressed in
this work is the issue of broadcast traffic arising due
to invalidation requests from on-chip cache memories.
Although such traffic is typically less than 1\% of
total traffic, it can easily present a high load on
network resources, creating congestion and degrading
performance. In this article, we propose a CDMA-based,
secure, scalable, and energy-efficient technique to
eliminate broadcast invalidations and increase overall
performance. Experimental results indicate a
performance boost up to 22.2\% over a competing
Photonic NoC and up to 57.4\% over Electrical
Mesh-based NoC when the proposed technique is used.
Additional hardware deployed has an area overhead of
less than 1\%, whereas total energy consumed is at par
with other state-of-the-art techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Santini:2016:BCS,
author = "Thiago Santini and Paolo Rech and Gabriel Luca Nazar
and Fl{\'a}vio Rech Wagner",
title = "Beyond Cross-Section: Spatio-Temporal Reliability
Analysis",
journal = j-TECS,
volume = "15",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2794148",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A computational system employed in safety-critical
applications typically has reliability as a primary
concern. Thus, the designer focuses on minimizing the
device radiation-sensitive area, often leading to
performance degradation. In this article, we present a
mathematical model to evaluate system reliability in
spatial (i.e., radiation-sensitive area) and temporal
(i.e., performance) terms and prove that minimizing
radiation-sensitive area does not necessarily maximize
application reliability. To support our claim, we
present an empirical counterexample where application
reliability is improved even if the radiation-sensitive
area of the device is increased. An extensive radiation
test campaign using a 28 nm commercial-off-the-shelf
ARM-based SoC was conducted, and experimental results
demonstrate that, while executing the considered
application at military aircraft altitude, the
probability of executing a two-year mission workload
without failures is increased by 5.85\% if L1 caches
are enabled (thus increasing the radiation-sensitive
area) when compared to no cache level being enabled.
However, if both L1 and L2 caches are enabled, the
probability is decreased by 31.59\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gebotys:2016:PCP,
author = "Catherine H. Gebotys and Brian A. White and Edgar
Mateos",
title = "Preaveraging and Carry Propagate Approaches to
Side-Channel Analysis of {HMAC-SHA256}",
journal = j-TECS,
volume = "15",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2794093",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Although HMAC-SHA has been standardized for over a
decade, few published attacks on the single-cycle round
implementation exist. In this research, new attack
techniques are provided, for the first time, (1) to
help to discriminate between values of secret
intermediate variables within HMAC and (2) to reduce
the large word size complexity. Preaveraging and carry
propagate techniques are proposed using chosen
plaintexts and shown to significantly reduce the
complexity and runtimes for side-channel analysis of an
Altera FPGA platform. This research is important for
advancing side channel analysis of complex embedded
ASICs and ensuring secure implementations in future
embedded ubiquitous devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dong:2016:DLD,
author = "Wei Dong and Luyao Luo and Chao Huang",
title = "Dynamic Logging with Dylog in Networked Embedded
Systems",
journal = j-TECS,
volume = "15",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2807698",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Event logging is an important technique for networked
embedded systems like wireless sensor networks. It can
greatly help developers to understand complex system
behaviors and diagnose program bugs. Existing logging
facilities do not well satisfy three practical
requirements: flexibility, efficiency, and high
synchronization accuracy. To simultaneously satisfy
these requirements, we present Dylog, a dynamic logging
facility for networked embedded systems. Dylog employs
several techniques. First, Dylog uses binary
instrumentation for dynamically inserting or removing
logging statements, enabling flexible and interactive
debugging at runtime. Second, Dylog incorporates an
efficient storage system and log collection protocol
for recording and transferring the logging messages.
Third, Dylog employs a lightweight data-driven approach
for reconstructing the synchronized time of the logging
messages. Dylog uses MAC-layer timestamping and drift
compensation to achieve high synchronization accuracy.
We implement Dylog on the TinyOS 2.1.1/TelosB platform.
Results show the following: (1) Dylog incurs a small
overhead. Indirections in Dylog incur an additional
execution overhead of less than 1\%. Dylog reduces the
logging storage size by approximately 50\% compared
with the standard TinyOS radio printf library. Dylog
reduces the patch size by more than 90\%, compared with
incremental reprogramming. (2) Dylog reduces the
synchronization overhead by 78\% in terms of
transmission cost, compared with a traditional time
synchronization protocol, FTSP, and it can achieve a
high time synchronization accuracy of 5.4 $ \mu $ s.
(3) Dylog can help diagnose system problems effectively
at the source-code level for three real-world
scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jiang:2016:PAD,
author = "Ke Jiang and Petru Eles and Zebo Peng",
title = "Power-Aware Design Techniques of Secure Multimode
Embedded Systems",
journal = j-TECS,
volume = "15",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801152",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Nowadays, embedded systems have been widely used in
all types of application areas, some of which belong to
the safety and reliability critical domains. The
functional correctness and design robustness of the
embedded systems involved in such domains are crucial
for the safety of personal/enterprise property or even
human lives. Thereby, a holistic design procedure that
considers all the important design concerns is
essential. In this article, we approach embedded
systems design from an integral perspective. We
consider not only the classic real-time and quality of
service requirements, but also the emerging security
and power efficiency demands. Modern embedded systems
are not any more developed for a fixed purpose, but
instead designed for undertaking various processing
requests. This leads to the concept of multimode
embedded systems, in which the number and nature of
active tasks change during runtime. Under dynamic
situations, providing high performance along with
various design concerns becomes a really difficult
problem. Therefore, we propose a novel power-aware
secure embedded systems design framework that
efficiently solves the problem of runtime quality
optimization with security and power constraints. The
efficiency of our proposed techniques are evaluated in
extensive experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bambagini:2016:EAS,
author = "Mario Bambagini and Mauro Marinoni and Hakan Aydin and
Giorgio Buttazzo",
title = "Energy-Aware Scheduling for Real-Time Systems: a
Survey",
journal = j-TECS,
volume = "15",
number = "1",
pages = "7:1--7:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2808231",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a survey of energy-aware
scheduling algorithms proposed for real-time systems.
The analysis presents the main results starting from
the middle 1990s until today, showing how the proposed
solutions evolved to address the evolution of the
platform's features and needs. The survey first
presents a taxonomy to classify the existing approaches
for uniprocessor systems, distinguishing them according
to the technology exploited for reducing energy
consumption, that is, Dynamic Voltage and Frequency
Scaling (DVFS), Dynamic Power Management (DPM), or
both. Then, the survey discusses the approaches
proposed in the literature to deal with the additional
problems related to the evolution of computing
platforms toward multicore architectures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Thomas:2016:EDP,
author = "Anna Thomas and Karthik Pattabiraman",
title = "Error Detector Placement for Soft Computing
Applications",
journal = j-TECS,
volume = "15",
number = "1",
pages = "8:1--8:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801154",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The scaling of Silicon devices has exacerbated the
unreliability of modern computer systems, and power
constraints have necessitated the involvement of
software in hardware error detection. At the same time,
emerging workloads in the form of soft computing
applications (e.g., multimedia applications) can
tolerate most hardware errors as long as the erroneous
outputs do not deviate significantly from error-free
outcomes. We term outcomes that deviate significantly
from the error-free outcomes as Egregious Data
Corruptions (EDCs). In this study, we propose a
technique to place detectors for selectively detecting
EDC-causing errors in an application. We performed an
initial study to formulate heuristics that identify
EDC-causing data. Based on these heuristics, we
developed an algorithm that identifies program
locations for placing high coverage detectors for EDCs
using static analysis. Our technique achieves an
average EDC coverage of 82\%, under performance
overheads of 10\%, while detecting 10\% of the Non-EDC
and benign faults. We also evaluate the error
resilience of these applications under the 14 compiler
optimizations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Corre:2016:TTB,
author = "Youenn Corre and Jean-Philippe Diguet and Dominique
Heller and Dominique Blouin and Lo{\"\i}c Lagadec",
title = "{TBES}: Template-Based Exploration and Synthesis of
Heterogeneous Multiprocessor Architectures on {FPGA}",
journal = j-TECS,
volume = "15",
number = "1",
pages = "9:1--9:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2816817",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article describes TBES, a software end-to-end
environment for synthesizing multitask applications on
FPGAs. The implementation follows a template-based
approach for creating heterogeneous multiprocessor
architectures. Heterogeneity stems from the use of
general-purpose processors along with custom
accelerators. Experimental results demonstrate
substantial speedup for several classes of
applications. Furthermore, this work allows for
reducing development costs and saving development time
for the software architect, the domain expert, and the
optimization expert. This work provides a framework to
bring together various existing tools and optimisation
algorithms. The advantages are manifold: modularity and
flexibility, easy customization for best-fit algorithm
selection, durability and evolution over time, and
legacy preservation including domain experts' know-how.
In addition to the use of architecture templates for
the overall system, a second contribution lies in using
high-level synthesis for promoting exploration of
hardware IPs. The domain expert, who best knows which
tasks are good candidates for hardware implementation,
selects parts of the initial application to be
potentially synthesized as dedicated accelerators. As a
consequence, the HLS general problem turns into a
constrained and more tractable issue, and automation
capabilities eliminate the need for tedious and
error-prone manual processes during domain space
exploration. The automation only takes place once the
application has been broken down into concurrent tasks
by the designer, who can then drive the synthesis
process with a set of parameters provided by TBES to
balance tradeoffs between optimization efforts and
quality of results. The approach is demonstrated step
by step up to FPGA implementations and executions with
an MJPEG benchmark and a complex Viola-Jones face
detection application. We show that TBES allows one to
achieve results with up to 10 times speedup to reduce
development times and to widen design space
exploration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chatterjee:2016:TAD,
author = "Urbi Chatterjee and Rajat Subhra Chakraborty and
Hitesh Kapoor and Debdeep Mukhopadhyay",
title = "Theory and Application of Delay Constraints in Arbiter
{PUF}",
journal = j-TECS,
volume = "15",
number = "1",
pages = "10:1--10:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2815621",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Physically Unclonable Function (PUF) circuits are
often vulnerable to mathematical model-building
attacks. We theoretically quantify the advantage
provided to an adversary by any training dataset
expansion technique along the lines of security
analysis of cryptographic hash functions. We present an
algorithm to enumerate certain sets of delay
constraints for the widely studied Arbiter PUF (APUF)
circuit, then demonstrate how these delay constraints
can be utilized to expand the set of known
Challenge--Response Pairs (CRPs), thus facilitating
model-building attacks. We provide experimental results
for Field Programmable Gate Array (FPGA)--based APUF to
establish the effectiveness of the proposed attack.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kwon:2016:CBF,
author = "Se Jin Kwon",
title = "A Cache-Based Flash Translation Layer for {TLC}-Based
Multimedia Storage Devices",
journal = j-TECS,
volume = "15",
number = "1",
pages = "11:1--11:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2820614",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Current triple-level cell (TLC)-based solids-tate
drives used in multimedia storage devices support
multichannel access to increase capacity and
throughput. Unfortunately, current state-of-the-art FTL
algorithms must employ selective caching for inquiring
about the address mapping information, which causes low
space utilization, a large flash memory requirement,
and performance degradation. In this article, the {$<$
u$>$Ca$<$}/{u$>$} che- {$<$ u$>$ b$<$}/{u$>$} ased
Flash Translation Layer (Cab-FTL) is proposed for
TLC-based multimedia storage devices. Cab-FTL enhances
the read and write performances by achieving high space
utilization while reducing the size of the mapping
tables to 1.68\% compared to DFTL. Despite a caching of
the mapping tables in DRAM, Cab-FTL achieves a fast
system boot using its fast wake-up mechanism.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2016:EPC,
author = "Sheng-Min Huang and Li-Pin Chang",
title = "Exploiting Page Correlations for Write Buffering in
Page-Mapping Multichannel {SSDs}",
journal = j-TECS,
volume = "15",
number = "1",
pages = "12:1--12:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2815622",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Advanced solid-state disks (SSDs) have been equipped
with page-mapping flash translation layers and
multichannel architectures. The SSDs employ a RAM-based
write buffer, which delays write requests for reducing
write traffic, reorders requests for mitigating
garbage-collection overhead, and produces parallel page
writes for improving channel time utilization. This
work presents a novel write buffer algorithm that
exploits temporal and spatial correlations among buffer
pages. The write-buffer groups temporally or spatially
correlate buffer pages and then write the grouped
buffer pages to the same flash block. In this way, when
the correlated page data are updated in the future,
flash blocks will receive bulk page invalidations and
become good candidates for garbage collection. With
multichannel architectures, the write buffer adaptively
disperses read-most sequential data over channels for
high page-level parallelism of sequential reads, while
clustering write-most sequential data in the same
channel for a reduced cost of garbage collection. We
evaluated the proposed method and previously proposed
buffer algorithms. Our method was shown to outperform
the existing methods by up to 134\%. We also
implemented our buffer design on the OpenSSD platform;
the time and space overheads of our design were
reported to be very low.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2016:SGA,
author = "Li-Pin Chang and Yu-Syun Liu and Wen-Huei Lin",
title = "Stable Greedy: Adaptive Garbage Collection for Durable
Page-Mapping Multichannel {SSDs}",
journal = j-TECS,
volume = "15",
number = "1",
pages = "13:1--13:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2820613",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Commodity solid state drives (SSDs) have recently
begun involving the adoption of powerful controllers
for multichannel flash management at the page level.
However, many of these models still use primitive
garbage-collection algorithms, because previous
approaches are subject to poor scalability with
high-capacity flash memory. This study presents Stable
Greedy for garbage collection in page-mapping
multichannel SSDs. Stable Greedy identifies
page-accurate data hotness using block-level
information, and jointly considers block space
utilization and block stability for victim selection.
Its design considers flash wear leveling for SSD
lifetime enhancement at the block level as well as at
the channel level. Stable Greedy runs at a constant
time, and requires limited RAM space. The simulation
results revealed that Stable Greedy outperformed
previous methods considerably under various workloads
and multichannel architectures. Stable Greedy was
successfully implemented on the OpenSSD platform, and
the actual performance measurements were consistent
with the simulation results.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2016:FFJ,
author = "Jinghao Sun and Nan Guan and Yang Wang and Qingxu Deng
and Peng Zeng and Wang Yi",
title = "Feasibility of Fork-Join Real-Time Task Graph Models:
Hardness and Algorithms",
journal = j-TECS,
volume = "15",
number = "1",
pages = "14:1--14:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2809780",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the formal analysis of real-time systems, modeling
of branching codes and modeling of intratask
parallelism structures are two of the most important
research topics. These two real-time properties are
combined, resulting in the fork-join real-time task
(FJRT) model, which extends the digraph-based task
model with forking and joining semantics. We prove that
the EDF schedulability problem on a preemptive
uniprocessor for the FJRT model is coNP-hard in the
strong sense, even if the utilization of the task
system is bounded by a constant strictly less than 1.
Then, we show that the problem becomes tractable with
some slight structural restrictions on parallel
sections, for which we propose an exact schedulability
test with pseudo-polynomial time complexity. Our
results thus establish a borderline between the
tractable and intractable FJRT models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{DiPietro:2016:CLD,
author = "Roberto {Di Pietro} and Flavio Lombardi and Antonio
Villani",
title = "{CUDA} Leaks: a Detailed Hack for {CUDA} and a
(Partial) Fix",
journal = j-TECS,
volume = "15",
number = "1",
pages = "15:1--15:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801153",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics processing units (GPUs) are increasingly
common on desktops, servers, and embedded platforms. In
this article, we report on new security issues related
to CUDA, which is the most widespread platform for GPU
computing. In particular, details and proofs-of-concept
are provided about novel vulnerabilities to which CUDA
architectures are subject. We show how such
vulnerabilities can be exploited to cause severe
information leakage. As a case study, we experimentally
show how to exploit one of these vulnerabilities on a
GPU implementation of the AES encryption algorithm.
Finally, we also suggest software patches and
alternative approaches to tackle the presented
vulnerabilities.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2016:SDW,
author = "Zhenhuan Zhu and S. Olutunde Oyadiji",
title = "Structure Design of Wireless Sensor Nodes with Energy
and Cost Awareness for Multichannel Signal
Measurement",
journal = j-TECS,
volume = "15",
number = "1",
pages = "16:1--16:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2790300",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article aims to develop a design pattern of a
wireless sensor node working in multichannel signal
measurement for effectively lowering energy consumption
and cost. The proposed design pattern enables the
architecture of a wireless sensor node to adapt to
application requirements, thus to significantly reduce
system redundancy. Two multisensor structures are
parameterized regarding frequency response, power
consumption, and cost. The system design pattern
provides flexibility through three proposed interface
circuits that bridge between multisensor structures and
the microprocessors inside sensor nodes. It also allows
adjusting time the delay parameter that can enlarge the
selection range of main electronic components, and
thereby increases the robustness of the model for
practical implementations. A virtual case study is
provided to demonstrate how to apply this model into an
application design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hsiu:2016:UCS,
author = "Pi-Cheng Hsiu and Po-Hsien Tseng and Wei-Ming Chen and
Chin-Chiang Pan and Tei-Wei Kuo",
title = "User-Centric Scheduling and Governing on Mobile
Devices with {big.LITTLE} Processors",
journal = j-TECS,
volume = "15",
number = "1",
pages = "17:1--17:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2829946",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Mobile applications will become progressively more
complicated and diverse. Heterogeneous computing
architectures like big.LITTLE are a hardware solution
that allows mobile devices to combine computing
performance and energy efficiency. However, software
solutions that conform to the paradigm of conventional
fair scheduling and governing are not applicable to
mobile systems, thereby degrading user experience or
reducing energy efficiency. In this article, we exploit
the concept of application sensitivity, which reflects
the user's attention on each application, and devise a
user-centric scheduler and governor that allocate
computing resources to applications according to their
sensitivity. Furthermore, we integrate our design into
the Android operating system. The results of
experiments conducted on a commercial big.LITTLE
smartphone with real-world mobile apps demonstrate that
the proposed design can achieve significant gains in
energy efficiency while improving the quality of user
experience.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sharma:2016:DFT,
author = "Namita Sharma and Preeti Ranjan Panda and Francky
Catthoor and Min Li and Prashant Agrawal",
title = "Data Flow Transformation for Energy-Efficient
Implementation of {Givens} Rotation-Based {QRD}",
journal = j-TECS,
volume = "15",
number = "1",
pages = "18:1--18:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2837025",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "QR decomposition (QRD), a matrix decomposition
algorithm widely used in embedded application domain,
can be realized in a large number of valid processing
sequences that differ significantly in the number of
memory accesses and computations, and hence the overall
implementation energy. With modern low-power embedded
processors evolving toward register files with wide
memory interfaces and vector functional units (FUs),
data flow in these algorithms needs to be carefully
devised to efficiently utilize the costly wide memory
accesses and the vector FUs. In this article, we
present an energy-efficient data flow transformation
strategy for the Givens rotation-based QRD.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Emeretlis:2016:LBB,
author = "Andreas Emeretlis and George Theodoridis and
Panayiotis Alefragis and Nikolaos Voros",
title = "A {Logic-Based Benders} Decomposition Approach for
Mapping Applications on Heterogeneous Multicore
Platforms",
journal = j-TECS,
volume = "15",
number = "1",
pages = "19:1--19:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2838733",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The development of efficient methods for mapping
applications on heterogeneous multicore platforms is a
key issue in the field of embedded systems. In this
article, a novel approach based on the Logic-Based
Benders decomposition principle is introduced for
mapping complex applications on these platforms, aiming
at optimizing their execution time. To provide optimal
solutions for this problem in a short time, a new
hybrid model that combines Integer Linear Programming
(ILP) and Constraint Programming (CP) models is
introduced. Also, to reduce the complexity of the model
and its solution time, a set of novel techniques for
generating additional constraints called Benders cuts
is proposed. An extensive set of experiments has been
performed in which synthetic applications described by
Directed Acyclic Graphs (DAGs) were mapped to a number
of heterogeneous multicore platforms. Moreover,
experiments with DAGs that correspond to two real-life
applications have also been performed. Based on the
experimental results, it is proven that the proposed
approach outperforms the pure ILP model in terms of the
solution time and quality of the solution.
Specifically, the proposed approach is able to find an
optimal solution within a time limit of 2 hours in the
vast majority of performed experiments, while the pure
ILP model fails. Also, for the cases where both methods
fail to find an optimal solution within the time limit,
the solution of the proposed approach is systematically
better than the solution of the ILP model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ko:2016:SBS,
author = "Yohan Ko and Jihoon Kang and Jongwon Lee and Yongjoo
Kim and Joonhyun Kim and Hwisoo So and Kyoungwoo Lee
and Yunheung Paek",
title = "Software-Based Selective Validation Techniques for
Robust {CGRAs} Against Soft Errors",
journal = j-TECS,
volume = "15",
number = "1",
pages = "20:1--20:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2843943",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Coarse-Grained Reconfigurable Architectures (CGRAs)
are drawing significant attention since they promise
both performances with parallelism and flexibility with
reconfiguration. Soft errors (or transient faults) are
becoming a serious design concern in embedded systems
including CGRAs since the soft error rate is increasing
exponentially as technology is scaling. A recently
proposed software-based technique with TMR (Triple
Modular Redundancy) implemented on CGRAs incurs extreme
overheads in terms of runtime and energy consumption
mainly due to expensive voting mechanisms for the
outputs from the triplication of every operation. In
this article, we propose selective validation
mechanisms for efficient modular redundancy techniques
in the datapaths on CGRAs. Our techniques selectively
validate the results at synchronous operations rather
than every operation in order to reduce the expensive
performance overhead from the validation mechanism. We
also present an optimization technique to further
improve the runtime and the energy consumption by
minimizing synchronous operations where a validating
mechanism needs to be applied. Our experimental results
demonstrate that our selective validation-based TMR
technique with our optimization on CGRAs can improve
the runtime by 41.0\% and the energy consumption by
26.2\% on average over benchmarks as compared to the
recently proposed software-based TMR technique with the
full validation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ivanov:2016:ARS,
author = "Radoslav Ivanov and Miroslav Pajic and Insup Lee",
title = "Attack-Resilient Sensor Fusion for Safety-Critical
Cyber-Physical Systems",
journal = j-TECS,
volume = "15",
number = "1",
pages = "21:1--21:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2847418",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article focuses on the design of safe and
attack-resilient Cyber-Physical Systems (CPS) equipped
with multiple sensors measuring the same physical
variable. A malicious attacker may be able to disrupt
system performance through compromising a subset of
these sensors. Consequently, we develop a precise and
resilient sensor fusion algorithm that combines the
data received from all sensors by taking into account
their specified precisions. In particular, we note that
in the presence of a shared bus, in which messages are
broadcast to all nodes in the network, the attacker's
impact depends on what sensors he has seen before
sending the corrupted measurements. Therefore, we
explore the effects of communication schedules on the
performance of sensor fusion and provide theoretical
and experimental results advocating for the use of the
Ascending schedule, which orders sensor transmissions
according to their precision starting from the most
precise. In addition, to improve the accuracy of the
sensor fusion algorithm, we consider the dynamics of
the system in order to incorporate past measurements at
the current time. Possible ways of mapping sensor
measurement history are investigated in the article and
are compared in terms of the confidence in the final
output of the sensor fusion. We show that the precision
of the algorithm using history is never worse than the
no-history one, while the benefits may be significant.
Furthermore, we utilize the complementary properties of
the two methods and show that their combination results
in a more precise and resilient algorithm. Finally, we
validate our approach in simulation and experiments on
a real unmanned ground robot.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2016:ESB,
author = "Sandeep K. Shukla",
title = "Editorial: Science of the Big and Small and Embedded
Computing Systems",
journal = j-TECS,
volume = "15",
number = "2",
pages = "21:1--21:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2901293",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dutt:2016:TSE,
author = "Nikil Dutt and Axel Jantsch and Santanu Sarma",
title = "Toward Smart Embedded Systems: a Self-aware
System-on-Chip {(SoC)} Perspective",
journal = j-TECS,
volume = "15",
number = "2",
pages = "22:1--22:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2872936",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded systems must address a multitude of
potentially conflicting design constraints such as
resiliency, energy, heat, cost, performance, security,
etc., all in the face of highly dynamic operational
behaviors and environmental conditions. By
incorporating elements of intelligence, the hope is
that the resulting ``smart'' embedded systems will
function correctly and within desired constraints in
spite of highly dynamic changes in the applications and
the environment, as well as in the underlying
software/hardware platforms. Since terms related to
``smartness'' (e.g., self-awareness, self-adaptivity,
and autonomy) have been used loosely in many software
and hardware computing contexts, we first present a
taxonomy of ``self-x'' terms and use this taxonomy to
relate major ``smart'' software and hardware computing
efforts. A major attribute for smart embedded systems
is the notion of self-awareness that enables an
embedded system to monitor its own state and behavior,
as well as the external environment, so as to adapt
intelligently. Toward this end, we use a System-on-Chip
perspective to show how the CyberPhysical
System-on-Chip (CPSoC) exemplar platform achieves
self-awareness through a combination of cross-layer
sensing, actuation, self-aware adaptations, and online
learning. We conclude with some thoughts on open
challenges and research directions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vinco:2016:ESI,
author = "Sara Vinco and Christian Pilato",
title = "Editorial: Special Issue on Innovative Design Methods
for Smart Embedded Systems",
journal = j-TECS,
volume = "15",
number = "2",
pages = "22:1--22:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2885505",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Oneto:2016:LHF,
author = "Luca Oneto and Sandro Ridella and Davide Anguita",
title = "Learning Hardware-Friendly Classifiers Through
Algorithmic Stability",
journal = j-TECS,
volume = "15",
number = "2",
pages = "23:1--23:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2836165",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most state-of-the-art machine-learning (ML) algorithms
do not consider the computational constraints of
implementing the learned model on embedded devices.
These constraints are, for example, the limited depth
of the arithmetic unit, the memory availability, or the
battery capacity. We propose a new learning framework,
the Algorithmic Risk Minimization (ARM), which relies
on Algorithmic-Stability, and includes these
constraints inside the learning process itself. ARM
allows one to train advanced resource-sparing ML models
and to efficiently deploy them on smart embedded
systems. Finally, we show the advantages of our
proposal on a smartphone-based Human Activity
Recognition application by comparing it to a
conventional ML approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Das:2016:AHR,
author = "Anup Das and Bashir M. Al-Hashimi and Geoff V.
Merrett",
title = "Adaptive and Hierarchical Runtime Manager for
Energy-Aware Thermal Management of Embedded Systems",
journal = j-TECS,
volume = "15",
number = "2",
pages = "24:1--24:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2834120",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern embedded systems execute applications, which
interact with the operating system and hardware
differently depending on the type of workload. These
cross-layer interactions result in wide variations of
the chip-wide thermal profile. In this article, a
reinforcement learning-based runtime manager is
proposed that guarantees application-specific
performance requirements and controls the POSIX thread
allocation and voltage/frequency scaling for
energy-efficient thermal management. This controls
three thermal aspects: peak temperature, average
temperature, and thermal cycling. Contrary to existing
learning-based runtime approaches that optimize energy
and temperature individually, the proposed runtime
manager is the first approach to combine the two
objectives, simultaneously addressing all three thermal
aspects. However, determining thread allocation and
core frequencies to optimize energy and temperature is
an NP-hard problem. This leads to exponential growth in
the learning table (significant memory overhead) and a
corresponding increase in the exploration time to learn
the most appropriate thread allocation and core
frequency for a particular application workload. To
confine the learning space and to minimize the learning
cost, the proposed runtime manager is implemented in a
two-stage hierarchy: a heuristic-based thread
allocation at a longer time interval to improve thermal
cycling, followed by a learning-based hardware
frequency selection at a much finer interval to improve
average temperature, peak temperature, and energy
consumption. This enables finer control on temperature
in an energy-efficient manner while simultaneously
addressing scalability, which is a crucial aspect for
multi-/many-core embedded systems. The proposed
hierarchical runtime manager is implemented for Linux
running on nVidia's Tegra SoC, featuring four ARM
Cortex-A15 cores. Experiments conducted with a range of
embedded and cpu-intensive applications demonstrate
that the proposed runtime manager not only reduces
energy consumption by an average 15\% with respect to
Linux but also improves all the thermal aspects-average
temperature by 14${}^\circ $C, peak temperature by
16${}^\circ $C, and thermal cycling by 54\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gu:2016:RTF,
author = "Xiaoqi Gu and Yongxin Zhu and Shengyan Zhou and
Chaojun Wang and Meikang Qiu and Guoxing Wang",
title = "A Real-Time {FPGA-Based} Accelerator for {ECG}
Analysis and Diagnosis Using Association-Rule Mining",
journal = j-TECS,
volume = "15",
number = "2",
pages = "25:1--25:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2821508",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Telemedicine provides health care services at a
distance using information and communication
technologies, which intends to be a solution to the
challenges faced by current health care systems with
growing numbers of population, increased demands from
patients, and shortages in human resources. Recent
advances in telemedicine, especially in wearable
electrocardiogram (ECG) monitors, call for more
intelligent and efficient automatic ECG analysis and
diagnostic systems. We present a streaming architecture
implemented on Field-Programmable Gate Arrays (FPGAs)
to accelerate real-time ECG signal analysis and
diagnosis in a pipelining and parallel way.
Association-rule mining is employed to generate early
diagnostic results by matching features of ECG with
generated association rules. To improve performance of
the processing, we propose a hardware-oriented
data-mining algorithm named Bit\_Q\_Apriori. The
corresponding hardware implementation indicates a good
scalability and outperforms other hardware designs in
terms of performance, throughput, and hardware cost.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Johnson:2016:RTR,
author = "Taylor T. Johnson and Stanley Bak and Marco Caccamo
and Lui Sha",
title = "Real-Time Reachability for Verified Simplex Design",
journal = j-TECS,
volume = "15",
number = "2",
pages = "26:1--26:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2723871",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Simplex architecture ensures the safe use of an
unverifiable complex/smart controller by using it in
conjunction with a verified safety controller and
verified supervisory controller (switching logic). This
architecture enables the safe use of smart,
high-performance, untrusted, and complex control
algorithms to enable autonomy without requiring the
smart controllers to be formally verified or certified.
Simplex incorporates a supervisory controller that will
take over control from the unverified complex/smart
controller if it misbehaves and use a safety
controller. The supervisory controller should (1)
guarantee that the system never enters an unsafe state
(safety), but should also (2) use the complex/smart
controller as much as possible (minimize conservatism).
The problem of precisely and correctly defining the
switching logic of the supervisory controller has
previously been considered either using a
control-theoretic optimization approach or through an
offline hybrid-systems reachability computation. In
this work, we show that a combined online/offline
approach that uses aspects of the two earlier methods,
along with a real-time reachability computation, also
maintains safety, but with significantly less
conservatism, allowing the complex controller to be
used more frequently. We demonstrate the advantages of
this unified approach on a saturated inverted pendulum
system, in which the verifiable region of attraction is
over twice as large compared to the earlier approach.
Additionally, to validate the claims that the real-time
reachability approach may be implemented on embedded
platforms, we have ported and conducted embedded
hardware studies using both ARM processors and Atmel
AVR microcontrollers. This is the first ever
demonstration of a hybrid-systems reachability
computation in real time on actual embedded platforms,
which required addressing significant technical
challenges.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baka:2016:NSS,
author = "Maria-Iro Baka and Francky Catthoor and Dimitrios
Soudris",
title = "Near-Static Shading Exploration for Smart Photovoltaic
Module Topologies Based on Snake-like Configurations",
journal = j-TECS,
volume = "15",
number = "2",
pages = "27:1--27:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2837026",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Variable shading due to clouds and nearby objects has
a severe impact on the energy yield of photovoltaic
installations. Due to the industry's standard of
permanently series-connected cells in a photovoltaic
(PV) module, partial shading creates mismatches between
the Current-Voltage (I-V) characteristics of cells.
This article proposes an alternative configurable
intramodule cell interconnection topology whereby cell
connections can be adapted during operation to allow an
optimized power production. The proposed configurable
topology outperforms significantly a conventional 10 $
\times $ 6 module under heavy shade. Moreover, this is
achieved in a quite flexible way and with negligible
overhead under uniform irradiation conditions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Malek:2016:RRQ,
author = "Alirad Malek and Ioannis Sourdis and Stavros Tzilis
and Yifan He and Gerard Rauwerda",
title = "{RQNoC}: a Resilient Quality-of-Service
Network-on-Chip with Service Redirection",
journal = j-TECS,
volume = "15",
number = "2",
pages = "28:1--28:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2846097",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we describe RQNoC, a service-oriented
Network-on-Chip (NoC) resilient to permanent faults. We
characterize the network resources based on the
particular service that they support and, when faulty,
bypass them, allowing the respective traffic class to
be redirected. We propose two alternatives for service
redirection, each having different advantages and
disadvantages. The first one, Service Detour, uses
longer alternative paths through resources of the same
service to bypass faulty network parts, keeping traffic
classes isolated. The second approach, Service Merge,
uses resources of other services providing shorter
paths but allowing traffic classes to interfere with
each other. The remaining network resources that are
common for all services employ additional mechanisms
for tolerating faults. Links tolerate faults using
additional spare wires combined with a flit-shifting
mechanism, and the router control is protected with
Triple-Modular-Redundancy (TMR). The proposed RQNoC
network designs are implemented in 65nm technology and
evaluated in terms of performance, area, power
consumption, and fault tolerance. Service Detour
requires 9\% more area and consumes 7.3\% more power
compared to a baseline network, not tolerant to faults.
Its packet latency and throughput is close to the
fault-free performance at low-fault densities, but
fault tolerance and performance drop substantially for
8 or more network faults. Service Merge requires 22\%
more area and 27\% more power than the baseline and has
a 9\% slower clock. Compared to a fault-free network, a
Service Merge RQNoC with up to 32 faults has increased
packet latency up to 1.5 to 2.4$ \times $ and reduced
throughput to 70\% or 50\%. However, it delivers
substantially better fault tolerance, having a mean
network connectivity above 90\% even with 32 network
faults versus 41\% of a Service Detour network.
Combining Serve Merge and Service Detour improves fault
tolerance, further sustaining a higher number of
network faults and reduced packet latency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ienne:2016:GES,
author = "Paolo Ienne and Jean-Pierre Talpin",
title = "Guest Editorial: Special Issue on Models and
Methodologies for System Design",
journal = j-TECS,
volume = "15",
number = "2",
pages = "29:1--29:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2885503",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Desnos:2016:MRB,
author = "Karol Desnos and Maxime Pelcat and Jean-Fran{\c{c}}ois
Nezan and Slaheddine Aridhi",
title = "On Memory Reuse Between Inputs and Outputs of Dataflow
Actors",
journal = j-TECS,
volume = "15",
number = "2",
pages = "30:1--30:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2871744",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces a new technique to minimize
the memory footprints of Digital Signal Processing
(DSP) applications specified with Synchronous Dataflow
(SDF) graphs and implemented on shared-memory
Multiprocessor System-on-Chip (MPSoCs). In addition to
the SDF specification, which captures data dependencies
between coarse-grained tasks called actors, the
proposed technique relies on two optional inputs
abstracting the internal data dependencies of actors:
annotations of the ports of actors, and script-based
specifications of merging opportunities between input
and output buffers of actors. Experimental results on a
set of applications show a reduction of the memory
footprint by 48\% compared to state-of-the-art
minimization techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nouri:2016:ARA,
author = "Ayoub Nouri and Marius Bozga and Anca Molnos and Axel
Legay and Saddek Bensalem",
title = "{ASTROLABE}: a Rigorous Approach for System-Level
Performance Modeling and Analysis",
journal = j-TECS,
volume = "15",
number = "2",
pages = "31:1--31:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2885498",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Building abstract system-level models that faithfully
capture performance and functional behavior for
embedded systems design is challenging. Unlike
functional aspects, performance details are rarely
available during the early design phases, and no clear
method is known to characterize them. Moreover, once
such models are built, they are inherently complex as
they mix software models, hardware constraints, and
environment abstractions. Their analysis by using
traditional performance evaluation methods is reaching
the limit. In this article, we present a systematic
approach for building stochastic abstract performance
models using statistical inference and model
calibration, and we propose statistical model checking
as a scalable performance evaluation technique for
them.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Butt:2016:DPH,
author = "Shahzad Ahmad Butt and Mehdi Roozmeh and Luciano
Lavagno",
title = "Designing Parameterizable Hardware {IPs} in a
Model-Based Design Environment for High-Level
Synthesis",
journal = j-TECS,
volume = "15",
number = "2",
pages = "32:1--32:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2871737",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Model-based hardware design allows one to map a single
model to multiple hardware and/or software
architectures, essentially eliminating one of the major
limitations of manual coding in C or RTL. Model-based
design for hardware implementation has traditionally
offered a limited set of microarchitectures, which are
typically suitable only for some application scenarios.
In this article we illustrate how digital signal
processing (DSP) algorithms can be modeled as flexible
intellectual property blocks to be used within the
popular Simulink model-based design environment. These
blocks are written in C and are designed for both
functional simulation and hardware implementation,
including architectural design space exploration and
hardware implementation through high-level synthesis. A
key advantage of our modeling approach is that the very
same bit-accurate model is used for simulation and
high-level synthesis. To prove the feasibility of our
proposed approach, we modeled a fast Fourier transform
(FFT) algorithm and synthesized it for different DSP
applications with very different performance and cost
requirements. We also implemented a
high-level-synthesis (HLS) intellectual property (IP)
generator that can generate flexible FFT HLS-IP blocks
that can be mapped to multiple
micro-/macroarchitectures, to enable design space
exploration as well as being used for functional
simulation in the Simulink environment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Le:2016:CBR,
author = "Thi Thieu Hoa Le and Roberto Passerone and Uli
Fahrenberg and Axel Legay",
title = "Contract-Based Requirement Modularization via
Synthesis of Correct Decompositions",
journal = j-TECS,
volume = "15",
number = "2",
pages = "33:1--33:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2885752",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In distributed development of modern systems,
contracts play a vital role in ensuring
interoperability of components and adherence to
specifications. It is therefore often desirable to
verify the satisfaction of an overall property
represented as a contract, given the satisfaction of
smaller properties also represented as contracts. When
the verification result is negative, designers must
face the issue of refining the subproperties and
components. This is an instance of the classical
synthesis problems: ``can we construct a model that
satisfies some given specification?'' In this work, we
propose two strategies enabling designers to synthesize
or refine a set of contracts so that their composition
satisfies a given contract. We develop a generic
algebraic method and show how it can be applied in
different contract models to support top-down
component-based development of distributed systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rho:2016:GEC,
author = "Seungmin Rho and Wenny Rahayu and Geyong Min",
title = "Guest Editorial: Challenges of Embedded Systems as
They Evolve into {M2M}, {Internet of Things}",
journal = j-TECS,
volume = "15",
number = "2",
pages = "34:1--34:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2886417",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zeng:2016:SLM,
author = "Jing Zeng and Laurence T. Yang and Jianhua Ma",
title = "A System-Level Modeling and Design for
Cyber-Physical-Social Systems",
journal = j-TECS,
volume = "15",
number = "2",
pages = "35:1--35:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2834119",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The design of cyber-physical-social systems (CPSS) is
a novel and challenging research field due that it
emphasizes the deep fusion of cyberspace, physical
space, and social space. In this article, we extend our
previously proposed system-level design framework [Zeng
et al. 2015] to tailor it to the needs of social
scenario of multiple users. A hierarchical Petri
net-based model and social flow are presented to extend
the control flow and formally describe the social
interactions of multiple users, respectively. By using
the extended model, the system-level optimization for
CPSS can be achieved by the improved design flow.
Specifically, object emplacement and user satisfaction
are further extended into the social environment. Also
maximal power estimation algorithm is improved,
leveraging the extended intermediate representation
model. Finally, we use a smart office case to
demonstrate the feasibility and effectiveness of our
improved design approach for multiple users.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2016:IRW,
author = "Daqiang Zhang and Jiafu Wan and Zongjian He and
Shengjie Zhao and Ke Fan and Sang Oh Park and Zhibin
Jiang",
title = "Identifying Region-Wide Functions Using Urban Taxicab
Trajectories",
journal = j-TECS,
volume = "15",
number = "2",
pages = "36:1--36:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2821507",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the urban development and enlargement, various
regions such as residential zones and administrative
districts now appear as parts of cities. People exhibit
different mobility patterns in each region, which is
closely relevant to region-wide functions. In this
article, we propose a scheme to discover region-wide
functions using large-scale Shanghai taxicab
trajectories that capture enormous traces for more than
13,000 taxicabs over a period of about 3 years. We
investigate these taxicab trajectories and conduct an
extensive preliminary study. Then, we divide the city
into disjointed regions using Voronoi decomposition. By
incorporating people's pick-up and drop-off
information, we refine the Voronoi partitioning results
to identify region-wide functional areas. Finally, we
study people's movement frequency on weekdays and
weekends for every kind of urban functional regions. We
also look into human mobility within or across the
identified urban functional regions. Experimental
results show that human movement is bounded with the
function of urban regions, and more than 90\% of people
visit neighboring (less than 20km travel distance)
functional regions with high probability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ji:2016:CLO,
author = "Wen Ji and Bo-Wei Chen and Xiangdong Wang and Haiyong
Luo and Mucheol Kim and Yiqiang Chen",
title = "Cross-Layer Opportunistic Scheduling for
Device-to-Device Video Multicast Services",
journal = j-TECS,
volume = "15",
number = "2",
pages = "37:1--37:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2856034",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we address the problem of how to make
the wireless device-to-device (D2D) video multicast
systems have better quality provision with
consideration of internet-of-things (IoT) applications.
We propose an opportunistic transmission and fair
resource allocation framework, including joint
application-layer and physical-layer transmission and
optimization. First, we use a parallel subchannels
structure by concatenating the Fountain codes and
diversity-embedded space-time block codes to provide
reliable and flexible transmission in heterogeneous
circumstances. Second, we exploit the quality of
heterogeneous user experience (quality of experience)
metric under D2D video multicast systems, with
consideration of various channel states, device
capability, video content urgency, and the number of
demanding users. Third, we formulate reliable multiple
video streams broadcasting to heterogeneous devices as
an aggregate maximum utility achieving problem, and we
use opportunistic scheduling to select suitable users
in each transmission interval to improve the
broadcasting utility. Fourth, we use the utility fair
scheme to guide rate allocation among multicontent
video multicast. Extensive performance comparison and
analysis are presented to demonstrate efficiency of the
proposed solution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2016:SMA,
author = "Lu Liu and Nick Antonopoulos and Minghui Zheng and
Yongzhao Zhan and Zhijun Ding",
title = "A Socioecological Model for Advanced Service Discovery
in Machine-to-Machine Communication Networks",
journal = j-TECS,
volume = "15",
number = "2",
pages = "38:1--38:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2811264",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The new development of embedded systems has the
potential to revolutionize our lives and will have a
significant impact on future Internet of Thing (IoT)
systems if required services can be automatically
discovered and accessed at runtime in
Machine-to-Machine (M2M) communication networks. It is
a crucial task for devices to perform timely service
discovery in a dynamic environment of IoTs. In this
article, we propose a Socioecological Service Discovery
(SESD) model for advanced service discovery in M2M
communication networks. In the SESD network, each
device can perform advanced service search to
dynamically resolve complex enquires and autonomously
support and co-operate with each other to quickly
discover and self-configure any services available in
M2M communication networks to deliver a real-time
capability. The proposed model has been systematically
evaluated and simulated in a dynamic M2M environment.
The experiment results show that SESD can self-adapt
and self-organize themselves in real time to generate
higher flexibility and adaptability and achieve a
better performance than the existing methods in terms
of the number of discovered service and a better
efficiency in terms of the number of discovered
services per message.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmad:2016:EMB,
author = "Awais Ahmad and Anand Paul and Mazhar Rathore and
Hangbae Chang",
title = "An Efficient Multidimensional Big Data Fusion Approach
in Machine-to-Machine Communication",
journal = j-TECS,
volume = "15",
number = "2",
pages = "39:1--39:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2834118",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Machine-to-Machine communication (M2M) is nowadays
increasingly becoming a world-wide network of
interconnected devices uniquely addressable, via
standard communication protocols. The prevalence of M2M
is bound to generate a massive volume of heterogeneous,
multisource, dynamic, and sparse data, which leads a
system towards major computational challenges, such as,
analysis, aggregation, and storage. Moreover, a
critical problem arises to extract the useful
information in an efficient manner from the massive
volume of data. Hence, to govern an adequate quality of
the analysis, diverse and capacious data needs to be
aggregated and fused. Therefore, it is imperative to
enhance the computational efficiency for fusing and
analyzing the massive volume of data. Therefore, to
address these issues, this article proposes an
efficient, multidimensional, big data analytical
architecture based on the fusion model. The basic
concept implicates the division of magnitudes
(attributes), i.e., big datasets with complex
magnitudes can be altered into smaller data subsets
using five levels of the fusion model that can be
easily processed by the Hadoop Processing Server,
resulting in formalizing the problem of feature
extraction applications using earth observatory system,
social networking, or networking applications.
Moreover, a four-layered network architecture is also
proposed that fulfills the basic requirements of the
analytical architecture. The feasibility and efficiency
of the proposed algorithms used in the fusion model are
implemented on Hadoop single-node setup on UBUNTU 14.04
LTS core i5 machine with 3.2GHz processor and 4GB
memory. The results show that the proposed system
architecture efficiently extracts various features
(such as land and sea) from the massive volume of
satellite data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2016:UMA,
author = "Eui-Jik Kim and Jung-Hyok Kwon and Ken Choi and
Taeshik Shon",
title = "Unified Medium Access Control Architecture for
Resource-Constrained Machine-to-Machine Devices",
journal = j-TECS,
volume = "15",
number = "2",
pages = "40:1--40:??",
month = may,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2876958",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In capillary machine-to-machine (M2M) communications,
which is being considered as a feasible network
solution for M2M applications, because of physical
resource constraints and deployment conditions, an
energy-efficient and scalable medium access control
(MAC) protocol is crucial for numerous M2M devices to
concurrently access wireless channels. Therefore, this
paper presents a unified MAC layer architecture for
resource-constrained M2M devices in capillary M2M
networks [named as resource-constrained MAC
architecture (RCMA)], which has a unified (monolithic)
framework consisting of essential functional components
to support MAC-related operations of M2M devices:
multi-channel hybrid MAC (McHM), logical link control
(LLC), time synchronizer (TS), and device on--off
scheduler (DO2S). McHM provides a baseline MAC protocol
for an entire capillary M2M system that combines the
benefit of both contention-based carrier sense multiple
access and schedule-based time division multiple access
schemes, whereas the other three components help in the
McHM operations. To demonstrate the effectiveness of
the RCMA, we implement the whole stack using the
QualNet simulator. Experimental results show that the
RCMA outperforms the conventional ZigBee stack in terms
of energy efficiency and scalability, even under heavy
traffic conditions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Franchino:2016:BOE,
author = "Gianluca Franchino and Giorgio Buttazzo and Mauro
Marinoni",
title = "Bandwidth Optimization and Energy Management in
Real-Time Wireless Networks",
journal = j-TECS,
volume = "15",
number = "3",
pages = "41:1--41:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2851498",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In embedded systems operated by battery and
interacting with the environment, a fundamental issue
is the enforcement of real-time and energy constraints
to guarantee a desired lifetime with a given
performance. A lot of research has focused on energy
management at the communication level; however, not
many authors considered both real-time and energy
requirements in wireless communication systems. This
article proposes El-SMan, a power-aware framework
working in combination with MAC layer communication
protocols for maximizing battery lifetime in wireless
networks of embedded systems with real-time
constraints. Exploiting the flexibility in bandwidth
requirements, El-SMan adapts stream parameters to
balance performance versus energy consumption, taking
both lifetime and message deadlines into account.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2016:EFI,
author = "Sandeep K. Shukla",
title = "Editorial: Fence Itself Grazing the Field --- Security
from the Sentries",
journal = j-TECS,
volume = "15",
number = "3",
pages = "41:1--41:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2953045",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2016:UUP,
author = "Yichuan Wang and Xin Liu and Cheng-Hsin Hsu",
title = "{UPDATE}: {User-Profile-Driven Adaptive TransfEr} for
Mobile Devices",
journal = j-TECS,
volume = "15",
number = "3",
pages = "42:1--42:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2889489",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Existing channel-aware scheduling work has mainly
focused on scheduling in small timescales, that is,
tens to hundreds of seconds. We propose to use
long-term user profiles to provide useful statistical
information on future network conditions in large
timescales. We design scheduling algorithms based on
Markov decision theory. We collect and use a large set
of real-life traces from the general public. Extensive
trace-driven evaluations show that many real mobile
users can benefit from our framework. In addition, we
compare our framework against state-of-the-art
algorithms and observe significant performance
differences because the existing algorithms were not
designed for the large timescale scenario.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sotiriou-Xanthopoulos:2016:IEV,
author = "Efstathios Sotiriou-Xanthopoulos and Sotirios Xydis
and Kostas Siozios and George Economakos and Dimitrios
Soudris",
title = "An Integrated Exploration and Virtual Platform
Framework for Many-Accelerator Heterogeneous Systems",
journal = j-TECS,
volume = "15",
number = "3",
pages = "43:1--43:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2866578",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The recent advent of many-accelerator systems-on-chip
(SoC), driven by the need for maximizing throughput and
power efficiency, has led to an exponential increase in
the hardware/software co-design complexity. The reason
of this increase is that the designer has to explore a
vast number of architectural parameter combinations for
each single accelerator, as well as inter-accelerator
configuration combinations under specific area,
throughput, and power constraints, given that each
accelerator has different computational requirements.
In such a case, the design space size explodes. Thus,
existing design space exploration (DSE) techniques give
poor-quality solutions, as the design space cannot be
adequately covered in a fair time. This problem is
aggravated by the very long simulation time of the
many-accelerator virtual platforms (VPs). This article
addresses these design issues by (a) presenting a
virtual prototyping solution that decreases the
exploration time by enabling the evaluation of multiple
configurations per VP simulation and (b) proposing a
DSE methodology that efficiently explores the design
space of many-accelerator systems. With the use of two
fully developed use cases, namely an H.264 decoding
server for multiple video streams and a parallelized
denoising system for MRI scans, we show that the
proposed DSE methodology either leads to Pareto points
that dominate over those of a typical DSE scenario or
finds new solutions that might not be found by the
typical DSE. In addition, the proposed virtual
prototyping solution leads to DSE runtime reduction
reaching 10 $ \times $ for H.264 and 5 $ \times $ for
Rician denoise.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Beretta:2016:PCA,
author = "Ivan Beretta and Vincenzo Rana and Abdulkadir Akin and
Alessandro Antonio Nacci and Donatella Sciuto and David
Atienza",
title = "Parallelizing the Chambolle Algorithm for
Performance-Optimized Mapping on {FPGA} Devices",
journal = j-TECS,
volume = "15",
number = "3",
pages = "44:1--44:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2851497",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The performance and the efficiency of recent computing
platforms have been deeply influenced by the widespread
adoption of hardware accelerators, such as graphics
processing units (GPUs) or field-programmable gate
arrays (FPGAs), which are often employed to support the
tasks of general-purpose processors (GPPs). One of the
main advantages of these accelerators over their
sequential counterparts (GPPs) is their ability to
perform massive parallel computation. However, to
exploit this competitive edge, it is necessary to
extract the parallelism from the target algorithm to be
executed, which generally is a very challenging task.
This concept is demonstrated, for instance, by the poor
performance achieved on relevant multimedia algorithms,
such as Chambolle, which is a well-known algorithm
employed for the optical flow estimation. The
implementations of this algorithm that can be found in
the state of the art are generally based on GPUs but
barely improve the performance that can be obtained
with a powerful GPP. In this article, we propose a
novel approach to extract the parallelism from
computation-intensive multimedia algorithms, which
includes an analysis of their dependency schema and an
assessment of their data reuse. We then perform a
thorough analysis of the Chambolle algorithm, providing
a formal proof of its inner data dependencies and
locality properties. Then, we exploit the
considerations drawn from this analysis by proposing an
architectural template that takes advantage of the
fine-grained parallelism of FPGA devices. Moreover,
since the proposed template can be instantiated with
different parameters, we also propose a design metric,
the expansion rate, to help the designer in the
estimation of the efficiency and performance of the
different instances, making it possible to select the
right one before the implementation phase. We finally
show, by means of experimental results, how the
proposed analysis and parallelization approach leads to
the design of efficient and high-performance FPGA-based
implementations that are orders of magnitude faster
than the state-of-the-art ones.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nagar:2016:FPW,
author = "Kartik Nagar and Y. N. Srikant",
title = "Fast and Precise Worst-Case Interference Placement for
Shared Cache Analysis",
journal = j-TECS,
volume = "15",
number = "3",
pages = "45:1--45:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2854151",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Real-time systems require a safe and precise estimate
of the worst-case execution time (WCET) of programs. In
multicore architectures, the precision of a program's
WCET estimate highly depends on the precision of its
predicted shared cache behavior. Prediction of shared
cache behavior is difficult due to the uncertain timing
of interfering shared cache accesses made by programs
running on other cores. Given the assignment of
programs to cores, the worst-case interference
placement (WCIP) technique tries to find the worst-case
timing of interfering accesses, which would cause the
maximum number of cache misses on the worst case path
of the program, to determine its WCET. Although WCIP
generates highly precise WCET estimates, the current
ILP-based approach is also known to have very high
analysis time. In this work, we investigate the WCIP
problem in detail and determine its source of hardness.
We show that performing WCIP is an NP-hard problem by
reducing the 0-1 knapsack problem. We use this
observation to make simplifying assumptions, which make
the WCIP problem tractable, and we propose an
approximate greedy technique for WCIP, whose time
complexity is linear in the size of the program. We
perform extensive experiments to show that the
assumptions do not affect the precision of WCIP but
result in significant reduction of analysis time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Starke:2016:EDV,
author = "Renan Augusto Starke and Andreu Carminati and
R{\^o}mulo {Silva De Oliveira}",
title = "Evaluating the Design of a {VLIW} Processor for
Real-Time Systems",
journal = j-TECS,
volume = "15",
number = "3",
pages = "46:1--46:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2889490",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Nowadays, many real-time applications are very complex
and as the complexity and the requirements of those
systems become more demanding, more hardware processing
capacity is necessary. Unfortunately, the correct
functioning of real-time systems depends not only on
the logically correct response but also on the time
when it is produced. General-purpose processor design
fails to deliver analyzability due to their
nondeterministic behavior caused by the use of cache
memories, dynamic branch prediction, speculative
execution, and out-of-order pipelines. In this article,
we investigate the pipeline performance of Very Long
Instruction Word (VLIW) architectures for real-time
systems with an in-order pipeline considering
Worst-Case Execution Time (WCET) performance.
Techniques on obtaining the WCET of VLIW machines are
also considered and we make a quantification on how
important are hardware techniques such as static branch
prediction, predication, and pipeline speed of complex
operations such as memory access and multiplication for
high-performance real-time systems. The memory
hierarchy is out of the scope of this article and we
used a classic deterministic structure formed by a
direct mapped instruction cache and a data scratchpad
memory. A VLIW prototype was implemented in VHDL from
scratch considering the HP VLIW ST231 ISA. We also show
some compiler insights and we use a representative
subset of the M{\"a}lardalen's WCET benchmarks for
validation and performance quantification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2016:SMR,
author = "Sang-Hoon Kim and Jinkyu Jeong and Jin-Soo Kim and
Seungryoul Maeng",
title = "{SmartLMK}: a Memory Reclamation Scheme for Improving
User-Perceived App Launch Time",
journal = j-TECS,
volume = "15",
number = "3",
pages = "47:1--47:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2894755",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As the mobile computing environment evolves, users
demand high-quality apps and better user experience.
Consequently, memory demand in mobile devices has
soared. Device manufacturers have fulfilled the demand
by equipping devices with more RAM. However, such a
hardware approach is only a temporary solution and does
not scale well in the resource-constrained mobile
environment. Meanwhile, mobile systems adopt a new app
life cycle and a memory reclamation scheme tailored for
the life cycle. When a user leaves an app, the app is
not terminated but cached in memory as long as there is
enough free memory. If the free memory gets low, a
victim app is terminated and the associated memory to
the app is reclaimed. This process-level approach has
worked well in the mobile environment. However, user
experience can be impaired severely because the victim
selection policy does not consider the user experience.
In this article, we propose a novel memory reclamation
scheme called SmartLMK. SmartLMK minimizes the impact
of the process-level reclamation on user experience.
The worthiness to keep an app in memory is modeled by
means of user-perceived app launch time and app usage
statistics. The memory footprint and impending memory
demand are estimated from the history of the memory
usage. Using these values and memory models, SmartLMK
picks up the least valuable apps and terminates them at
once. Our evaluation on a real Android-based smartphone
shows that SmartLMK efficiently distinguishes the
valuable apps among cached apps and keeps those
valuable apps in memory. As a result, the
user-perceived app launch time can be improved by up to
13.2\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2016:APA,
author = "Dongwon Kim and Yohan Chon and Wonwoo Jung and Yungeun
Kim and Hojung Cha",
title = "Accurate Prediction of Available Battery Time for
Mobile Applications",
journal = j-TECS,
volume = "15",
number = "3",
pages = "48:1--48:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2875423",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy consumption in mobile devices is an important
issue for both system developers and users. Users are
aware of the battery-related information of their
mobile devices and tend to take appropriate actions to
increase the battery life. In this article, we propose
a framework that accurately estimates the remaining
battery time of applications at runtime. The framework
profiles the power behavior of applications tied with
activated hardware components and estimates the
remaining battery budget utilizing the battery-related
data provided by the device. The experiments validate
that our method predicts the remaining battery time for
applications with approximately 93\% of accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2016:NSC,
author = "Rehan Ahmed and Parameswaran Ramanathan and Kewal K.
Saluja",
title = "Necessary and Sufficient Conditions for Thermal
Schedulability of Periodic Real-Time Tasks Under Fluid
Scheduling Model",
journal = j-TECS,
volume = "15",
number = "3",
pages = "49:1--49:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2883612",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the growing need to address the thermal issues in
modern processing platforms, various performance
throttling schemes have been proposed in literature
(DVFS, clock gating, and so on) to manage temperature.
In real-time systems, such methods are often
unacceptable, as they can result in potentially
catastrophic deadline misses. As a result, real-time
scheduling research has recently focused on developing
algorithms that meet the compute deadline while
satisfying power and thermal constraints. Basic bounds
that can determine if a set of tasks can be scheduled
or not were established in the 1970s based on
computation utilization. Similar results for thermal
bounds have not been forthcoming. In this article, we
address the problem of thermal constraint
schedulability of tasks and derive necessary and
sufficient conditions for thermal feasibility of
periodic tasksets on a unicore system. We prove that a
GPS-inspired fluid scheduling scheme is thermally
optimal when context switch/preemption overhead is
ignored. Extension of sufficient conditions to a
nonfluid model is still an open problem. We also extend
some of the results to a multicore processing
environment. We demonstrate the efficacy of our results
through extensive simulations. We also evaluate the
proposed concepts on a hardware testbed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2016:USS,
author = "Fang Li and Jiafu Wan and Ping Zhang and Di Li and
Daqiang Zhang and Keliang Zhou",
title = "Usage-Specific Semantic Integration for Cyber-Physical
Robot Systems",
journal = j-TECS,
volume = "15",
number = "3",
pages = "50:1--50:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2873057",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The multidisciplinary nature and time criticality of
computing in Cyber-Physical Robot Systems (CPRS) makes
it significantly different from traditional computer
systems. This article attempts to create a
usage-specific language called Cyber-Physical Robot
Language (CPRL), which supports the CPRS design and
implementation in an integrative and swift way.
Multiview description and integration strategies as
well as formal execution semantics for usage-specific
simulation and verification are outlined. A graphic
unified environment for CPRS modeling is supplied, in
which several tools are integrated. A 6-DOF distributed
robot system development in the environment is
presented. The approach is an attempt to support CPRS
design in an effective way, at the same time
guaranteeing the system function and performance
requirements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{An:2016:MBD,
author = "Xin An and Eric Rutten and Jean-Philippe Diguet and
Abdoulaye Gamati{\'e}",
title = "Model-Based Design of Correct Controllers for
Dynamically Reconfigurable Architectures",
journal = j-TECS,
volume = "15",
number = "3",
pages = "51:1--51:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2873056",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Dynamically reconfigurable hardware has been
identified as a promising solution for the design of
energy-efficient embedded systems. However, its
adoption is limited by costly design effort, including
verification and validation, which is even more complex
than for nondynamically reconfigurable systems. In this
article, we propose a tool-supported formal method to
automatically design a correct-by-construction control
of the reconfiguration. By representing system
behaviors with automata, we exploit automated
algorithms to synthesize controllers that safely
enforce reconfiguration strategies formulated as
properties to be satisfied by control. We design
generic modeling patterns for a class of reconfigurable
architectures, taking into account both hardware
architecture and applications, as well as relevant
control objectives. We validate our approach on two
case studies implemented on FPGAs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hilal:2016:CEA,
author = "Allaa R. Hilal and Otman Basir",
title = "A Collaborative Energy-Aware Sensor Management System
Using Team Theory",
journal = j-TECS,
volume = "15",
number = "3",
pages = "52:1--52:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2910574",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With limited battery supply, power is a scarce
commodity in wireless sensor networks. Thus, to prolong
the lifetime of the network, it is imperative that the
sensor resources are managed effectively. This task is
particularly challenging in heterogeneous sensor
networks for which decisions and compromises regarding
sensing strategies are to be made under time and
resource constraints. In such networks, a sensor has to
reason about its current state to take actions that are
deemed appropriate with respect to its mission, its
energy reserve, and the survivability of the overall
network. Sensor Management controls and coordinates the
use of the sensory suites in a manner that maximizes
the success rate of the system in achieving its
missions. This article focuses on formulating and
developing an autonomous energy-aware sensor management
system that strives to achieve network objectives while
maximizing its lifetime. A team-theoretic formulation
based on the Belief-Desire-Intention (BDI) model and
the Joint Intention theory is proposed as a mechanism
for effective and energy-aware collaborative
decision-making. The proposed system models the
collective behavior of the sensor nodes using the Joint
Intention theory to enhance sensors' collaboration and
success rate. Moreover, the BDI modeling of the sensor
operation and reasoning allows a sensor node to adapt
to the environment dynamics, situation-criticality
level, and availability of its own resources. The
simulation scenario selected in this work is the
surveillance of the Waterloo International Airport.
Various experiments are conducted to investigate the
effect of varying the network size, number of threats,
threat agility, environment dynamism, as well as
tracking quality and energy consumption, on the
performance of the proposed system. The experimental
results demonstrate the merits of the proposed approach
compared to the state-of-the-art centralized approach
adapted from Atia et al. [2011] and the localized
approach in Hilal and Basir [2015] in terms of energy
consumption, adaptability, and network lifetime. The
results show that the proposed approach has 12 $ \times
$ less energy consumption than that of the popular
centralized approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ungerer:2016:PIH,
author = "Theo Ungerer and Christian Bradatsch and Martin Frieb
and Florian Kluge and J{\"o}rg Mische and Alexander
Stegmeier and Ralf Jahr and Mike Gerdes and Pavel
Zaykov and Lucie Matusova and Zai Jian Jia Li and
Zlatko Petrov and Bert B{\"o}ddeker and Sebastian Kehr
and Hans Regler and Andreas Hugl and Christine Rochange
and Haluk Ozaktas and Hugues Cass{\'e} and Armelle
Bonenfant and Pascal Sainrat and Nick Lay and David
George and Ian Broster and Eduardo Qui{\~n}ones and
Milos Panic and Jaume Abella and Carles Hernandez and
Francisco Cazorla and Sascha Uhrig and Mathias Rohde
and Arthur Pyka",
title = "Parallelizing Industrial Hard Real-Time Applications
for the {parMERASA} Multicore",
journal = j-TECS,
volume = "15",
number = "3",
pages = "53:1--53:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2910589",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The EC project parMERASA (Multicore Execution of
Parallelized Hard Real-Time Applications Supporting
Analyzability) investigated timing-analyzable parallel
hard real-time applications running on a predictable
multicore processor. A pattern-supported
parallelization approach was developed to ease
sequential to parallel program transformation based on
parallel design patterns that are timing analyzable.
The parallelization approach was applied to parallelize
the following industrial hard real-time programs: 3D
path planning and stereo navigation algorithms
(Honeywell International s.r.o.), control algorithm for
a dynamic compaction machine (BAUER Maschinen GmbH),
and a diesel engine management system (DENSO AUTOMOTIVE
Deutschland GmbH). This article focuses on the
parallelization approach, experiences during
parallelization with the applications, and quantitative
results reached by simulation, by static WCET analysis
with the OTAWA tool, and by measurement-based WCET
analysis with the RapiTime tool.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tanasa:2016:CAP,
author = "Bogdan Tanasa and Unmesh D. Bordoloi and Petru Eles
and Zebo Peng",
title = "Correlation-Aware Probabilistic Timing Analysis for
the Dynamic Segment of {FlexRay}",
journal = j-TECS,
volume = "15",
number = "3",
pages = "54:1--54:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2870635",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose an analytical framework for probabilistic
timing analysis of the event-triggered Dynamic segment
of the FlexRay communication protocol. Specifically,
our framework computes the Deadline Miss Ratio of each
message. The core problem is formulated as a Mixed
Integer Linear Program (MILP). Given the intractability
of the problem, we also propose several techniques that
help to mitigate the running times of our tool. This
includes the re-engineering of the problem to run it on
GPUs as well as reformulating the MILP itself. Most
importantly, we also show how our framework can handle
correlations between the queuing events of messages.
This is challenging because one cannot apply the
convolution operator in the same way as in the case of
independent queuing events.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2016:BAU,
author = "Ming-Chang Yang and Yuan-Hao Chang and Che-Wei Tsao",
title = "Byte-Addressable Update Scheme to Minimize the Energy
Consumption of {PCM}-Based Storage Systems",
journal = j-TECS,
volume = "15",
number = "3",
pages = "55:1--55:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2910590",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In recent years, phase-change memory (PCM) has
generated a great deal of interest because of its byte
addressability and nonvolatility properties. It is
regarded as a good alternative storage medium that can
reduce the performance gap between the main memory and
the secondary storage in computing systems. However,
its high energy consumption on writes is a challenging
issue in the design of battery-powered mobile computing
systems. To reduce the energy consumption, we exploit
the byte addressability and the asymmetric read-write
energy/latency of PCM in an energy-efficient update
scheme for journaling file systems. We also introduce a
concept called the 50\% rule to determine/recommend the
best update strategy for block updates. The proposed
scheme only writes modified data, instead of the whole
updated block, to PCM-based storage devices without
extra hardware support. Moreover, it guarantees the
sanity/integrity of file systems even if the computing
system crashes or there is a power failure during the
data update process. We implemented the proposed scheme
on the Linux system and conducted a series of
experiments to evaluate the scheme. The results are
very encouraging.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2016:EIR,
author = "Biao Hu and Kai Huang and Gang Chen and Long Cheng and
Alois Knoll",
title = "Evaluation and Improvements of Runtime Monitoring
Methods for Real-Time Event Streams",
journal = j-TECS,
volume = "15",
number = "3",
pages = "56:1--56:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2890503",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Runtime monitoring is of great importance as a
safeguard to guarantee the correctness of system
runtime behaviors. Two state-of-the-art methods,
dynamic counters and l -repetitive function, were
recently developed to tackle the runtime monitoring for
real-time systems. While both are reported to be
efficient in monitoring arbitrary events, the
monitoring performance between them has not yet been
evaluated. This article evaluates both methods in
depth, to identify their strengths and weaknesses. New
methods are proposed to efficiently monitor the
many-to-one connections that are abstracted as AND and
OR components on multiple inputs. Representative
scenarios are used as our case studies to
quantitatively demonstrate the evaluations. Both
methods are implemented in hardware F pga. The timing
overhead and resource usages of implementing the two
methods are evaluated.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lu:2016:VCV,
author = "Yaojie Lu and Seyedamin Rooholamin and Sotirios G.
Ziavras",
title = "Vector Coprocessor Virtualization for Simultaneous
Multithreading",
journal = j-TECS,
volume = "15",
number = "3",
pages = "57:1--57:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2898364",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Vector coprocessors (VPs), commonly being assigned
exclusively to a single thread/core, are not often
performance and energy efficient due to mismatches with
the vector needs of individual applications. We present
in this article an easy-to-implement VP virtualization
technique that, when applied, enables a multithreaded
VP to simultaneously execute multiple threads of
similar or arbitrary vector lengths to achieve improved
aggregate utilization. With a vector register file
(VRF) virtualization technique invented to dynamically
allocate physical vector registers to threads, our VP
virtualization approach improves programmer
productivity by providing at runtime a distinct
physical register name space to each competing thread,
thus eliminating the need to solve register-name
conflicts statically. We applied our virtualization
technique to a multithreaded VP and prototyped an
FPGA-based multicore processor system that supports VP
sharing as well as power gating for better energy
efficiency. Under the dynamic creation of disparate
threads, our benchmarking results show impressive VP
speedups of up to 333\% and total energy savings of up
to 37\% with proper thread scheduling and power gating
compared to a similar-sized system that allows VP
access to just one thread at a time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2016:HMR,
author = "Hwajeong Seo and Zhe Liu and Yasuyuki Nogami and
Jongseok Choi and Howon Kim",
title = "Hybrid {Montgomery} Reduction",
journal = j-TECS,
volume = "15",
number = "3",
pages = "58:1--58:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2890502",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present a hybrid method to improve
the performance of the Montgomery reduction by taking
advantage of the Karatsuba technique. We divide the
Montgomery reduction into two sub-parts, including one
for the conventional Montgomery reduction and the other
one for Karatsuba-aided multiplication. This approach
reduces the multiplication complexity of $n$-limb
Montgomery reduction from $ \theta (n^2 + n)$ to
asymptotic complexity $ \theta (7 n^2 / 8 + n)$. Our
practical implementation results over an 8-bit
microcontroller also show performance enhancements by
11\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Filippopoulos:2016:IEM,
author = "Iason Filippopoulos and Namita Sharma and Francky
Catthoor and Per Gunnar Kjeldsberg and Preeti Ranjan
Panda",
title = "Integrated Exploration Methodology for Data
Interleaving and Data-to-Memory Mapping on {SIMD}
Architectures",
journal = j-TECS,
volume = "15",
number = "3",
pages = "59:1--59:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2894754",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This work presents a methodology for efficient
exploration of data interleaving and data-to-memory
mapping options for Single Instruction Multiple Data
(SIMD) platform architectures. The system architecture
consists of a reconfigurable clustered scratch-pad
memory and a SIMD functional unit, which performs the
same operation on multiple input data in parallel. The
memory accesses contribute substantially to the overall
energy consumption of an embedded system executing a
data intensive task. The scope of this work is the
reduction of the overall energy consumption by
increasing the utilization of the functional units and
decreasing the number of memory accesses. The presented
methodology is tested using a number of benchmark
applications with holes in their access scheme.
Potential gains are calculated based on the energy
models, both for the processing and the memory part of
the system. The reduction in energy consumption after
efficient interleaving and mapping of data is between
40\% and 80\% for the complete system and the studied
benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ferreira:2016:LRF,
author = "Ronaldo R. Ferreira and Gabriel L. Nazar and Jean {Da
Rolt} and {\'A}lvaro F. Moreira and Luigi Carro",
title = "Live-Out Register Fencing: Interrupt-Triggered Soft
Error Correction Based on the Elimination of
Register-to-Register Communication",
journal = j-TECS,
volume = "15",
number = "3",
pages = "60:1--60:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2873058",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article introduces Live-Out Register Fencing
(LoRF), a soft error correction mechanism that uses the
novel Spill Register File as a container of
checkpointing data. LoRF's Spill Register File holds
the values shared among basic blocks in the program,
and, coupled with a new compilation strategy, LoRF
allows for error correction in the same basic block
where the error was detected. In LoRF, error correction
is triggered by a hardware interrupt that restores the
registers of a basic block from the Spill Register
File. After these registers are restored, the basic
block where the error was detected can just be
re-executed, thus reducing the costs of error recovery.
LoRF's error correction policy eliminates the need for
expensive architectural support for checkpointing and
rollback, reducing the performance overhead of online
soft error correction. LoRF relies on both a modified
processor architecture and a corresponding compiler.
The architecture was implemented in synthesizable VHDL,
whereas the compiler was developed as an extension of
the LLVM framework. Fault injection experiments support
an error correction coverage of 99.35\% and a mean
performance overhead of 1.33 for the entire life cycle
of an error from its occurrence to its elimination from
the system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khalid:2016:RHL,
author = "Ayesha Khalid and Goutam Paul and Anupam Chattopadhyay
and Faezeh Abediostad and Syed Imad Ud Din and Muhammad
Hassan and Baishik Biswas and Prasanna Ravi",
title = "{RunStream}: a High-Level Rapid Prototyping Framework
for Stream Ciphers",
journal = j-TECS,
volume = "15",
number = "3",
pages = "61:1--61:??",
month = jul,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2891412",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jul 21 17:18:13 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present RunStream, a rapid prototyping framework
for realizing stream cipher implementations based on
algorithmic specifications and architectural
customizations desired by the users. In the dynamic
world of cryptography where newer recommendations are
frequently proposed, the need of such tools is
imperative. It carries out design validation and
generates an optimized software implementation and a
synthesizable Register Transfer Level Verilog
description. Our framework enables speedy benchmarking
against critical resources like area, throughput,
power, and latency and allows exploration of
alternatives. Using RunStream, we successfully
implemented various stream ciphers and benchmarked the
quality of results to be at par with published
hand-optimized implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2016:ESE,
author = "Sandeep K. Shukla",
title = "Editorial: Security of Embedded Systems and Cyber
Irons --- Embedded Systems for Security",
journal = j-TECS,
volume = "15",
number = "4",
pages = "62:1--62:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2976731",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Eles:2016:GES,
author = "Petru Eles and Rolf Ernst",
title = "Guest Editorial for Special Issue of {ESWEEK 2015}",
journal = j-TECS,
volume = "15",
number = "4",
pages = "63:1--63:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2968218",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{You:2016:VVA,
author = "Yi-Ping You and Szu-Chien Chen",
title = "{VecRA}: a Vector-Aware Register Allocator for {GPU}
Shader Processors",
journal = j-TECS,
volume = "15",
number = "4",
pages = "64:1--64:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2961026",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics processing units (GPUs) are now widely used
in embedded systems for manipulating computer graphics
and even for general-purpose computation. However, many
embedded systems have to manage highly restricted
hardware resources in order to achieve high performance
or energy efficiency. The number of registers is one of
the common limiting factors in an embedded GPU design.
Programs that run with a low number of registers may
suffer from high register pressure if register
allocation is not properly designed, especially on a
GPU in which a register is divided into four elements
and each element can be accessed separately, because
allocating a register for a vector-type variable that
does not contain values in all elements wastes register
spaces. In this article, we present a vector-aware
register allocation framework to improve register
utilization on shader architectures. The framework
involves two major components: (1) element-based
register allocation that allocates registers based on
the element requirement of variables and (2) register
packing that rearranges elements of registers in order
to increase the number of contiguous free elements,
thereby keeping more live variables in registers.
Experimental results on a cycle-approximate simulator
showed that the proposed framework decreased 92\% of
register spills in total and made 91.7\% of 14 common
shader programs spill free. These results indicate an
opportunity for energy management of the space that is
used for storing spilled variables, with the framework
improving the performance by a geometric mean of 8.3\%,
16.3\%, and 29.2\% for general shader processors in
which variables are spilled to memory with 5-, 10-, and
20-cycle access latencies, respectively. Furthermore,
the reduction in the register requirement of programs
enabled another 11 programs with high register pressure
to be runnable on a lightweight GPU.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2016:ETA,
author = "Weichen Liu and Chunhua Xiao",
title = "An Efficient Technique of Application Mapping and
Scheduling on Real-Time Multiprocessor Systems for
Throughput Optimization",
journal = j-TECS,
volume = "15",
number = "4",
pages = "65:1--65:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950051",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multiprocessor systems are becoming ubiquitous in
today's embedded systems design. In this article, we
address the problem of mapping an application
represented by a Homogeneous Synchronous Dataflow
(HSDF) graph onto a real-time multiprocessor platform
with the objective of maximizing total throughput. We
propose that the optimal solution to the problem is
composed of three components: actor-to-processor
mapping, retiming, and actor ordering on each
processor. The entire problem is systematically modeled
into a Boolean Satisfiability (SAT) problem such that
the optimal solution can be guaranteed theoretically.
In order to explore the vast solution space more
efficiently, we develop a specific HSDF theory solver
based on the special characteristics of the timed HSDF,
and integrate it into the general search framework of
the SAT solver. Two alternative integration methods
based on branch-and-bound are presented to achieve
early branch pruning in the search space; thus, the
scalability is greatly improved. Extensive performance
evaluation on synthetic examples and a case study on
the realistic H.264 Video Decoder show that our
approach provides as much as 76.9\% throughput
improvement, and is scalable to industry-sized
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Duraisamy:2016:HPE,
author = "Karthi Duraisamy and Hao Lu and Partha Pratim Pande
and Ananth Kalyanaraman",
title = "High-Performance and Energy-Efficient Network-on-Chip
Architectures for Graph Analytics",
journal = j-TECS,
volume = "15",
number = "4",
pages = "66:1--66:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2961027",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With its applicability spanning numerous data-driven
fields, the implementation of graph analytics on
multicore platforms is gaining momentum. One of the
most important components of a multicore chip is its
communication backbone. Due to inherent irregularities
in data movements manifested by graph-based
applications, it is essential to design efficient
on-chip interconnection architectures for multicore
chips performing graph analytics. In this article, we
present a detailed analysis of the traffic patterns
generated by graph-based applications when mapped to
multicore chips. Based on this analysis, we explore the
design-space for the Network-on-Chip (NoC) architecture
to enable an efficient implementation of graph
analytics. We principally consider three types of NoC
architectures, viz., traditional mesh, small-world, and
high-radix networks. We demonstrate that the
small-world-network-enabled wireless NoC (WiNoC) is the
most suitable platform for executing the considered
graph applications. The WiNoC achieves an average of
38\% and 18\% full-system Energy Delay Product savings
compared to wireline-mesh and high-radix NoCs,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kriebel:2016:RAA,
author = "Florian Kriebel and Semeen Rehman and Arun
Subramaniyan and Segnon Jean Bruno Ahandagbe and
Muhammad Shafique and J{\"o}rg Henkel",
title = "Reliability-Aware Adaptations for Shared Last-Level
Caches in Multi-Cores",
journal = j-TECS,
volume = "15",
number = "4",
pages = "67:1--67:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2961059",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "On account of their large footprint, on-chip
last-level caches in multi-core systems are one of the
most vulnerable components to soft errors. However,
vulnerability to soft errors highly depends on the
configuration and parameters of the last-level cache,
especially when executing different applications
concurrently. In this article we propose a novel
reliability-aware reconfigurable last-level cache
architecture (R$^2$ Cache) and cache vulnerability
model for multi-cores. R$^2$ Cache supports various
reliability-wise efficient cache configurations (i.e.,
cache parameter selection and cache partitioning) for
different concurrently executing applications. The
proposed vulnerability model takes into account the
vulnerability of both the data and tag arrays as well
as the active cache area for applications in different
execution phases. To enable runtime adaptations, we
introduce a lightweight online vulnerability predictor
that exploits the knowledge of performance metrics like
number of L2 misses to accurately estimate the cache
vulnerability to soft errors. Based on the predicted
vulnerabilities of different concurrently executing
applications in the current execution epoch, our
runtime reliability manager reconfigures the cache such
that, for the next execution epoch, the total
vulnerability for all concurrently executing
applications is minimized under user-provided tolerable
performance/energy overheads. In scenarios where
single-bit error correction for cache lines may be
afforded, vulnerability-aware reconfigurations can be
leveraged to increase the reliability of the last-level
cache against multi-bit errors. Compared to
state-of-the-art vulnerability-minimizing and
reconfigurable caches, the proposed architecture
provides 35.27\% and 23.42\% vulnerability savings,
respectively, when averaged across numerous
experiments, while reducing the vulnerability by more
than 65\% and 60\%, respectively, for selected
applications and application phases.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Spasic:2016:IHR,
author = "Jelena Spasic and Di Liu and Emanuele Cannella and
Todor Stefanov",
title = "On the Improved Hard Real-Time Scheduling of
Cyclo-Static Dataflow",
journal = j-TECS,
volume = "15",
number = "4",
pages = "68:1--68:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2932188",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recently, it has been shown that the hard real-time
scheduling theory can be applied to streaming
applications modeled as acyclic Cyclo-Static Dataflow
(CSDF) graphs. However, this recent approach is not
always efficient in terms of throughput and processor
utilization. Therefore, in this article, we propose an
improved hard real-time scheduling approach to schedule
streaming applications modeled as acyclic CSDF graphs
on a Multiprocessor System-on-Chip (MPSoC) platform.
The proposed approach converts each actor in a CSDF
graph to a set of real-time periodic tasks. The
conversion enables application of many hard real-time
scheduling algorithms that offer fast calculation of
the required number of processors for scheduling the
tasks. In addition, we propose a method to reduce the
graph latency when the converted tasks are scheduled as
real-time periodic tasks. We evaluate the performance
and time complexity of our approach in comparison to
several existing scheduling approaches. Experiments on
a set of real-life streaming applications demonstrate
that our approach (1) results in systems with higher
throughput and better processor utilization in
comparison to the existing hard real-time scheduling
approach for CSDF graphs, while requiring comparable
time for the system derivation; (2) delivers shorter
application latency by applying the proposed method for
graph latency reduction while providing better
throughput and processor utilization when compared to
the existing hard real-time scheduling approach; (3)
gives the same throughput as the existing periodic
scheduling approach for CSDF graphs, but requires much
shorter time to derive the task schedule and tasks'
parameters (periods, start times, and so on); and (4)
gives the throughput that is equal to or very close to
the maximum achievable throughput of an application
obtained via self-timed scheduling, but requires much
shorter time to derive the schedule. The total time
needed for the proposed conversion approach and the
calculation of the minimum number of processors needed
to schedule the tasks and the calculation of the size
of communication buffers between tasks is in the range
of seconds.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Allamigeon:2016:SAM,
author = "Xavier Allamigeon and St{\'e}phane Gaubert and Nikolas
Stott and {\'E}ric Goubault and Sylvie Putot",
title = "A Scalable Algebraic Method to Infer Quadratic
Invariants of Switched Systems",
journal = j-TECS,
volume = "15",
number = "4",
pages = "69:1--69:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2932187",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present a new numerical abstract domain based on
ellipsoids designed for the formal verification of
switched linear systems. Unlike the existing
approaches, this domain does not rely on a user-given
template. We overcome the difficulty that ellipsoids do
not have a lattice structure by exhibiting a canonical
operator over-approximating the union. This operator is
the only one that permits the performance of analyses
that are invariant with respect to a linear
transformation of state variables. It provides the
minimum volume ellipsoid enclosing two given
ellipsoids. We show that it can be computed in O (
n$^3$ ) elementary algebraic operations. We finally
develop a fast nonlinear power-type algorithm, which
allows one to determine sound quadratic invariants on
switched systems in a tractable way, by solving
fixed-point problems over the space of ellipsoids. We
test our approach on several benchmarks, and compare it
with the standard techniques based on linear matrix
inequalities, showing an important speedup on typical
instances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2016:SAR,
author = "Xueguang Wu and Liqian Chen and Antoine Min{\'e} and
Wei Dong and Ji Wang",
title = "Static Analysis of Runtime Errors in Interrupt-Driven
Programs via Sequentialization",
journal = j-TECS,
volume = "15",
number = "4",
pages = "70:1--70:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2914789",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Embedded software often involves intensive numerical
computations and suffers from a number of runtime
errors. The technique of numerical static analysis is
of practical importance for checking the correctness of
embedded software. However, most of the existing
approaches of numerical static analysis consider
sequential programs, while interrupts are a commonly
used facility that introduces concurrency in embedded
systems. Therefore, a numerical static analysis
approach is highly desired for embedded software with
interrupts. In this article, we propose a static
analysis approach specifically for interrupt-driven
programs based on sequentialization techniques. We
present a method to sequentialize interrupt-driven
programs into nondeterministic sequential programs
according to the semantics of interrupts. The key
benefit of using sequentialization is the ability to
leverage the power of state-of-the-art analysis and
verification techniques for sequential programs to
analyze interrupt-driven programs, for example, the
power of numerical abstract interpretation to analyze
numerical properties of the sequentialized programs.
Furthermore, to improve the analysis precision and
scalability, we design specific abstract domains to
analyze sequentialized interrupt-driven programs by
considering their specific features. Finally, we
present encouraging experimental results obtained by
our prototype implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baudart:2016:LTT,
author = "Guillaume Baudart and Albert Benveniste and Timothy
Bourke",
title = "Loosely Time-Triggered Architectures: Improvements and
Comparisons",
journal = j-TECS,
volume = "15",
number = "4",
pages = "71:1--71:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2932189",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Loosely Time-Triggered Architectures (LTTAs) are a
proposal for constructing distributed embedded control
systems. They build on the quasi-periodic architecture,
where computing units execute nearly periodically, by
adding a thin layer of middleware that facilitates the
implementation of synchronous applications. In this
article, we show how the deployment of a synchronous
application on a quasi-periodic architecture can be
modeled using a synchronous formalism. Then we detail
two protocols, Back-Pressure LTTA, reminiscent of
elastic circuits, and Time-Based LTTA, based on
waiting. Compared to previous work, we present
controller models that can be compiled for execution, a
simplified version of the Time-Based protocol and
optimizations for systems using broadcast
communication. We also compare the LTTA approach with
architectures based on clock synchronization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shen:2016:UAS,
author = "Jie Shen and Yingjue Cai and Yang Ren and Xiao Yang",
title = "A Universal Application Storage System Based on Smart
Card",
journal = j-TECS,
volume = "15",
number = "4",
pages = "72:1--72:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2886116",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Nowadays, electronic commerce (e-commerce) has brought
facilitation to people's daily lives. Smart-card-based
systems are widely used as an implementation, where
smart cards act as a secure carrier for small-sized
data. However, most of these systems are developed and
managed by each service provider individually and
repeatedly, which causes both unnecessary work and
difficulties in future maintenance. Besides, advantages
of smart card technology are not full-fledged for the
lack of enough consideration in flexibility and
security. To propose a solution, this article presents
a Universal Application Storage System, including card
side, terminal side, and back-end system. The card side
provides a universal and secured infrastructure for
data storage, where data are organized and stored in a
card file system with several security mechanisms. In
the terminal side, a framework for accessing various
forms of secure element is presented to simplify the
procedures involved in manipulating smart cards.
Through this framework, the back-end system is able to
establish a direct connection to the card, and performs
authorized operations by exchanging commands in a
secure channel. The validity of the proposed system is
verified at the end of this article, illustrated by an
e-coupon system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hassan:2016:HSB,
author = "Hadeer A. Hassan and Sameh A. Salem and Ahmed M.
Mostafa and E. M. Saad",
title = "Harmonic Segment-Based Semi-Partitioning Scheduling on
Multi-Core Real-Time Systems",
journal = j-TECS,
volume = "15",
number = "4",
pages = "73:1--73:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2933388",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Nowadays, the issue of scheduling multi-core real-time
systems has become the focus of such research in
industrial, biomedical, military, and other fields. As
a consequence, a new semi-partitioning algorithm that
uses a static Rate-Monotonic criterion to schedule
real-time tasks on multi-core platforms is proposed.
The improvement in the performance of real-time systems
is achieved by exploiting the fact that the utilization
boundary of a task set increases to fully utilize the
processors if the periods of tasks have harmonic nature
among each other. Experimental results on randomly
generated datasets and real-world datasets show that
the proposed algorithm inevitably outperforms other
competitive algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2016:JJO,
author = "Chin-Hsien Wu and Syuan-An Chen",
title = "{JOM}: a Joint Operation Mechanism for {NAND} Flash
Memory",
journal = j-TECS,
volume = "15",
number = "4",
pages = "74:1--74:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2915916",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the storage systems of NAND flash memory, an
intermediate software called a Flash Translation Layer
(FTL) is adopted to hide the characteristics of NAND
flash memory and provide efficient management for NAND
flash memory. Current flash translation layers can be
classified into a page-mapping FTL, a block-mapping
FTL, and a hybrid-mapping FTL. In order to utilize the
advantages of the page-mapping FTL and the
block-mapping FTL, the hybrid-mapping FTL is proposed
to store data to the appropriate mapping mechanism by
switching the mapping information between the
page-mapping mechanism and the block-mapping mechanism.
In the article, we propose a joint operation mechanism
to rethink the advantages of the page-mapping FTL, the
block-mapping FTL, and the hybrid-mapping FTL. With the
joint operation mechanism, a flash translation layer
can consider the main memory requirements, improve the
system performance, and reduce the garbage collection
overhead. The experimental results show that the
proposed joint operation mechanism can achieve the goal
under realistic workloads and benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chiew:2016:NEI,
author = "Wei Ming Chiew and Feng Lin and Hock Soon Seah",
title = "A Novel Embedded Interpolation Algorithm with Negative
Squared Distance for Real-Time Endomicroscopy",
journal = j-TECS,
volume = "15",
number = "4",
pages = "75:1--75:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2905367",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Interpolation is the most executed operation and one
of the main bottlenecks in embedded imaging,
registration, and rendering systems. Existing methods
either lack parallelization and scalability
capabilities or are too computationally complex to
execute efficiently. Acknowledging that improving
execution time leads to degradation in image quality,
we formulate a novel Negative Squared Distance (NSD)
interpolation method that exhibits excellent
performance by exploiting Look-Up Table (LUT)
optimization for Field Programmable Gate Array (FPGA)
speedup, with a balanced trade-off in quality in our
embedded endomicroscopic imaging system. Quantitative
analysis on performance and resource utilization of NSD
against existing methods is reported through an
implementation on a Xilinx ML605 platform. Functional
validation using practical image resizing and rotation
applications to compare qualitative performance against
existing algorithms is performed and presented with
visual and numerical results. Our method is shown to
have a smaller design size and produces a maximum
throughput of over twofold against trilinear
interpolation with on-par image quality as the baseline
method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2016:CFQ,
author = "Chun-Han Lin and Chih-Kai Kang and Pi-Cheng Hsiu",
title = "{CURA}: a Framework for Quality-Retaining Power Saving
on Mobile {OLED} Displays",
journal = j-TECS,
volume = "15",
number = "4",
pages = "76:1--76:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2909875",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Organic Light-Emitting Diode (OLED) technology is
regarded as a promising alternative to mobile displays.
In this article, we introduce the design, algorithm,
and implementation of a novel framework called CURA for
quality-retaining power saving on mobile OLED displays.
First, we link human visual attention to OLED power
saving and model the OLED image scaling optimization
problem. The objective is to minimize the power
required to display an image without adversely
impacting the user's visual experience. Then, we
present the algorithm used to solve the modeled
problem, and prove its optimality even without an
accurate power model. Finally, based on the framework,
we implement two practical applications on a commercial
OLED mobile tablet. The results of experiments
conducted on the tablet with real images demonstrate
that CURA can reduce significant OLED power consumption
while retaining the visual quality of images.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hester:2016:PCB,
author = "Josiah Hester and Nicole Tobias and Amir Rahmati and
Lanny Sitanayah and Daniel Holcomb and Kevin Fu and
Wayne P. Burleson and Jacob Sorber",
title = "Persistent Clocks for Batteryless Sensing Devices",
journal = j-TECS,
volume = "15",
number = "4",
pages = "77:1--77:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2903140",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sensing platforms are becoming batteryless to enable
the vision of the Internet of Things, where trillions
of devices collect data, interact with each other, and
interact with people. However, these batteryless
sensing platforms-that rely purely on energy
harvesting-are rarely able to maintain a sense of time
after a power failure. This makes working with sensor
data that is time sensitive especially difficult. We
propose two novel, zero-power timekeepers that use
remanence decay to measure the time elapsed between
power failures. Our approaches compute the elapsed time
from the amount of decay of a capacitive device, either
on-chip Static Random-Access Memory (SRAM) or a
dedicated capacitor. This enables hourglass-like timers
that give intermittently powered sensing devices a
persistent sense of time. Our evaluation shows that
applications using either timekeeper can keep time
accurately through power failures as long as 45s with
low overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xi:2016:FSS,
author = "Kai Xi and Jiankun Hu and B. V. K. Vijaya Kumar",
title = "{FE-SViT}: a {SViT}-Based Fuzzy Extractor Framework",
journal = j-TECS,
volume = "15",
number = "4",
pages = "78:1--78:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930669",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As a promising bio-cryptographic technique, the fuzzy
extractor seamlessly binds biometrics and cryptography
for template protection and key generation. However,
most existing methods hardly solve the following issues
simultaneously: (1) Fingerprint registration, (2)
Verification accuracy, (3) Security strength, and (4)
Computational efficiency. In this article, we introduce
a bio-crypto-oriented fingerprint verification scheme
--- Selective Vertex-indexed Triangulation (SViT) which
maps minutia global topology to local triangulation
with minimum information loss. Then, a SViT-based fuzzy
extractor framework (FE-SViT) is proposed and high
verification accuracy is achieved. The FE-SViT is
highly parallelizable and efficient which makes it
suitable for embedded devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Olivier:2016:MEP,
author = "Pierre Olivier and Jalil Boukhobza and Eric Senn and
Hamza Ouarnoughi",
title = "A Methodology for Estimating Performance and Power
Consumption of Embedded Flash File Systems",
journal = j-TECS,
volume = "15",
number = "4",
pages = "79:1--79:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2903139",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Sep 1 16:03:45 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the embedded systems domain, obtaining performance
and power consumption estimations is extremely valuable
in numerous cases. This is particularly true during the
design stage, as designers of complex embedded systems
face an increasingly large design space. Secondary
storage is a well-known performance bottleneck and has
also been reported as an important factor of power
consumption. Flash memory is the main secondary storage
media in an embedded system and exhibits specific
constraints in its usage. One popular way to manage
these constraints is to use dedicated Flash File
Systems (FFS). In this article, we propose a
methodology to estimate the performance and power
consumption of applicative I/Os on an FFS-based storage
system within embedded Linux. The methodology is
divided into three sequential steps. In the exploration
phase, the main factors of an FFS storage system
impacting performance and power consumption are
identified. In the modeling phase, this impact is
formalized into models. Finally, in the last phase, the
models are implemented in a simulator named OpenFlash.
OpenFlash allows obtaining performance and power
consumption estimations for an applicative workload
processed by the Linux FFS storage stack on an embedded
platform. The simulator is validated against real
measurements and the estimation error stays below
10\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2016:EDP,
author = "Sandeep K. Shukla",
title = "Editorial: Distributed Public Ledgers and Block Chains
--- What Good Are They for Embedded Systems?",
journal = j-TECS,
volume = "16",
number = "1",
pages = "1:1--1:2",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/3001902",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Castrillon:2016:GES,
author = "Jeronimo Castrillon and Cristina Silvano",
title = "Guest Editorial: Special Issue on {Virtual Prototyping
of Parallel and Embedded Systems (ViPES)}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "2:1--2:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2991466",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Bortolotti:2016:VRT,
author = "Daniele Bortolotti and Andrea Marongiu and Luca
Benini",
title = "{VirtualSoC}: a Research Tool for Modern {MPSoCs}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "3:1--3:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930665",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Architectural heterogeneity has proven to be an
effective design paradigm to cope with an
ever-increasing demand for computational power within
tight energy budgets, in virtually every computing
domain. Programmable manycore accelerators are
currently widely used not only in high-performance
computing systems, but also in embedded devices, in
which they operate as coprocessors under the control of
a general-purpose CPU (the host processor). Clearly,
such powerful hardware architectures are paired with
sophisticated and complex software ecosystems, composed
of operating systems, programming models plus
associated runtime engines, and increasingly complex
user applications with related libraries. System
modeling has always played a key role in early
architectural exploration or software development when
the real hardware is not available. The necessity of
efficiently coping with the huge HW/SW design space
provided by the described heterogeneous Systems on Chip
(SoCs) calls for advanced full-system simulation
methodologies and tools, capable of assessing various
metrics for the functional and nonfunctional properties
of the target system. In this article, we describe
VirtualSoC, a simulation tool targeting the full-system
simulation of massively parallel heterogeneous SoCs. We
also describe how VirtualSoC has been successfully
adopted in several research projects.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Wehner:2016:SRM,
author = "Philipp Wehner and Jens Rettkowski and Tobias Kalb and
Diana G{\"o}hringer",
title = "Simulating Reconfigurable Multiprocessor
Systems-on-Chip with {MPSoCSim}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "4:1--4:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2972952",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Upcoming reconfigurable Multiprocessor Systems-on-Chip
(MPSoCs) present new challenges for the design and
early estimation of technology requirements due to
their runtime adaptive hardware architecture. The usage
of simulators offers capabilities to overcome these
issues. In this article, MPSoCSim, a SystemC simulator
for Network-on-Chip (NoC) based MPSoCs is extended to
support the simulation of reconfigurable MPSoCs.
Processors, such as ARM and MicroBlaze, and peripheral
models used within the virtual platform are provided by
Imperas/OVP and attached to the NoC. Moreover, traffic
generators are available to analyze the system. The
virtual platform currently supports mesh topology with
wormhole switching and several routing algorithms such
as XY-, a minimal West-First algorithm, and an adaptive
West-First algorithm. Amongst the impact of routing
algorithms regarding performance, reconfiguration
processes can be examined using the presented
simulator. A mechanism for dynamic partial
reconfiguration is implemented that is oriented towards
the reconfiguration scheme on real FPGA platforms. It
includes the simulation of the undefined behavior of
the hardware region during reconfiguration and allows
the adjustment of parameters. During runtime, dynamic
partial reconfiguration interfaces are used to connect
the Network-on-Chip infrastructure with reconfigurable
regions. The configuration access ports can be modeled
by the controller for the dynamic partial
reconfiguration in form of an application programming
interface. An additional SystemC component enables the
readout of simulation time from within the application.
For evaluation of the simulator timing and power
consumption of the simulated hardware are estimated and
compared with a real hardware implementation on a
Xilinx Zynq FPGA. The comparison shows that the
simulator improves the development of reconfigurable
MPSoCs by early estimation of system requirements. The
power estimations show a maximum deviation of 9mW at
1.9W total power consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Sauer:2016:LFD,
author = "Christian Sauer and Hans-Peter Loeb",
title = "A Lightweight Framework for the Dynamic Creation and
Configuration of Virtual Platforms in {SystemC}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "5:1--5:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983626",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Virtual prototypes leverage SystemC/TLM for simulating
programmable platforms comprising hundreds of modules.
Their efficient creation and configuration is vital for
acceptable turnaround times, for example, during
performance exploration or software development.
Therefore, our lightweight framework provides a factory
that creates designs from abstract descriptions of
module instances, properties, and connections. Modules
mark properties as creation or runtime parameters. The
resulting generic design descriptions are usable by
non-experts and enable front-ends. The infrastructure
is a small C++ library with only 1,350 lines of code
that can be combined with existing SystemC/TLM models
and simulation kernels. An industrial case study of a
complex multiprocessor SoC shows a distinct
productivity gain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Meyer:2016:SSC,
author = "Rolf Meyer and Jan Wagner and Bastian Farkas and Sven
Horsinka and Patrick Siegl and Rainer Buchty and Mladen
Berekovic",
title = "A Scriptable Standard-Compliant Reporting and Logging
Framework for {SystemC}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "6:1--6:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983623",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the ever-increasing complexity of digital
designs, debugging and evaluation face likewise
increasing challenges. While recent advances in
hardware/software co-simulation have been made,
solutions for corresponding debugging and evaluation
did not mature and improve in a similar fashion. In
this article, we present a dedicated solution to ease
the debugging and evaluation efforts, particularly
focusing on full-system simulation. Improving
significantly over existing solutions, the presented
approach features a standards-compliant powerful and
flexible method of deriving, logging, and filtering
detailed status information from SystemC-based models.
At the core of this approach are flexible scripting
capabilities that may change all logging parameters
during runtime, thus not requiring re-compiling the
to-be-simulated model, as in many competing solutions.
The approach is tested and benchmarked with a
real-world full-system example, demonstrating the
overall benefits. The presented solution is published
as open source via github (see text) and, by strictly
adhering to existing standards, is generally compatible
with existing SystemC simulation environments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Murillo:2016:MSD,
author = "Luis Gabriel Murillo and R{\`o}bert Lajos B{\"u}cs and
Rainer Leupers and Gerd Ascheid",
title = "{MPSoC} Software Debugging on Virtual Platforms via
Execution Control with Event Graphs",
journal = j-TECS,
volume = "16",
number = "1",
pages = "7:1--7:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950052",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Virtual Platforms (VPs) are advantageous to develop
and debug complex software for multi- and
many-processor systems-on-chip (MPSoCs). VPs provide
unrivaled controllability and visibility of the target,
which can be exploited to examine bugs that cannot be
reproduced easily in real hardware (e.g., bugs
originating from races or happening during a processor
stand-by state). However, VPs as employed in practice
for debugging are generally underutilized. The
accompanying debug ecosystem is based mostly on
traditional tools, such as step-based debuggers and
traces, that fall short to address the enormous
complexity of modern MPSoCs and their parallel
software. Finding a bug is still largely left to the
developer's experience and intuition, using manual
means rather than automated or systematic solutions
that exploit the controllability and visibility of VPs.
Profiting from VPs for MPSoC software debugging is an
open question. To bridge this gap, this article
presents a novel framework for debug visualization and
execution control that, relying on the many benefits of
VPs, helps to identify and test possible
concurrency-related bug scenarios. The framework allows
examining and steering the target system by
manipulating an abstract graph that highlights relevant
inter-component interactions and dependencies. The
proposed framework reduces the effort required to
understand complex concurrency patterns and helps to
expose bugs. Its efficacy is demonstrated on (i) a
shared memory symmetric multi-processing platform
executing Linux and parallel benchmarks, and (ii) a
distributed automotive system for driver assistance
applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Sotiriou-Xanthopoulos:2016:FIA,
author = "Efstathios Sotiriou-Xanthopoulos and Sotirios Xydis
and Kostas Siozios and George Economakos and Dimitrios
Soudris",
title = "A Framework for Interconnection-Aware Domain-Specific
Many-Accelerator Synthesis",
journal = j-TECS,
volume = "16",
number = "1",
pages = "8:1--8:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983624",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many-accelerator Systems-on-Chip (SoC) have recently
emerged as a promising platform paradigm that combines
parallelization with heterogeneity, in order to cover
the increasing demands for high performance and energy
efficiency. To exploit the full potential of
many-accelerator systems, automated design verification
and analysis frameworks are required, targeted to both
computational and interconnection optimization.
Accurate simulation of interconnection schemes should
use real stimuli, which are produced from fully
functional nodes, requiring the prototyping of the
processing elements and memories of the
many-accelerator system. In this article, we argue that
the Hierarchical Network-on-Chip (HNoC) scheme forms a
very promising solution for many-accelerator systems in
terms of scalability and data-congestion minimization.
We present a parameterizable SystemC prototyping
framework for HNoCs, targeted to domain-specific
many-accelerator systems. The framework supports the
prototyping of processing elements, memory modules, and
underlying interconnection infrastructure, while it
provides an API for their easy integration to the HNoC.
Finally, it enables holistic system simulation using
real node data. Using as a case study a
many-accelerator system of an MRI pipeline, an analysis
on the proposed framework is presented to demonstrate
the impact of the system parameters on the system.
Through extensive experimental analysis, we show the
superiority of HNoC schemes in comparison to typical
interconnection architectures. Finally, we show that,
adopting the proposed many-accelerator design flow,
significant performance improvements are achieved, from
$ 1.2 \times $ up to $ 26 \times $, as compared to a
x86 software implementation of the MRI pipeline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Zhu:2016:GES,
author = "Dakai Zhu and Meikang Qiu and Samarjit Chakraborty",
title = "Guest Editorial: Special Issue on Emerging
Technologies in Embedded Software and Systems",
journal = j-TECS,
volume = "16",
number = "1",
pages = "9:1--9:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2991464",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Jayakumar:2016:SMV,
author = "Hrishikesh Jayakumar and Arnab Raha and Vijay
Raghunathan",
title = "Sleep-Mode Voltage Scaling: Enabling {SRAM} Data
Retention at Ultra-Low Power in Embedded
Microcontrollers",
journal = j-TECS,
volume = "16",
number = "1",
pages = "10:1--10:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950054",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In heavily duty-cycled embedded systems, the energy
consumed by the microcontroller in idle mode is often
the bottleneck for battery lifetime. Existing solutions
address this problem by placing the microcontroller in
a low-power (sleep) mode when idle and preserving
application state either by retaining the data in situ
in Static Random Access Memory (SRAM) or by
checkpointing it to F lash. However, both of these
approaches have notable drawbacks. In situ data
retention requires the SRAM to remain powered in sleep
mode, while checkpointing to Flash involves significant
energy and time overheads. This article proposes a new
ultra-low-power sleep mode for microcontrollers that
overcomes the limitations of both of these approaches.
Our technique, Hypnos, is based on the key observation
that the on-chip SRAM in a microcontroller exhibits
100\% data retention even at a much lower supply
voltage (as much as $ 10 \times $ lower) than the
typical operating voltage of the microcontroller.
Hypnos exploits this observation by performing extreme
voltage scaling when the microcontroller is in sleep
mode. We implement and evaluate Hypnos for the TI
MSP430G2452 microcontroller and show that the
Microcontroller (MCU) draws only 26nA in the proposed
sleep mode, which is $ 4 \times $ lower than a baseline
sleep mode that preserves SRAM contents. Further, to
reduce the overheads associated with performing the
voltage scaling, we propose the use of an energy
harvesting source for providing the scaled supply
voltage and demonstrate (using a light sensing
photodiode) that the current consumption in the
proposed sleep mode can be reduced to 1nA, which is $
100 \times $ lower than the current consumption in the
baseline low-power mode. We also show that the decrease
in sleep-mode power consumption translates to a
reduction in application-level energy consumption by as
much as $ 6.45 \times $. By decreasing the average
power consumption to such minuscule levels, Hypnos
takes a significant step forward in making perpetual
systems a reality through the use of energy
harvesting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Marz:2016:RPC,
author = "Stephen Marz and Brad {Vander Zanden}",
title = "Reducing Power Consumption and Latency in Mobile
Devices Using an Event Stream Model",
journal = j-TECS,
volume = "16",
number = "1",
pages = "11:1--11:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2964203",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most consumer-based mobile devices use asynchronous
events to awaken apps. Currently, event handling is
implemented in either an application or an application
framework such as Java's virtual machine (VM) or
Microsoft's {.NET}, and it uses a ``polling loop'' that
periodically queries an event queue to determine if an
event has occurred. These loops must awaken the
process, check for an event, and then put the process
back to sleep many times per second. This constant
arousal prevents the CPU from being put into a deep
sleep state, which increases power consumption.
Additionally, the process cannot check for events while
it sleeps, and this delay in handling events increases
latency, which is the time that elapses between when an
event occurs and when the application responds to the
event. We call this model of event handling a ``pull''
model because it needs to query hardware devices or
software queues in order to ``pull'' events from them.
Recent advances in input devices support direct,
informative interrupts to the kernel when an event
occurs. This allows us to develop a much more efficient
event-handling model called the ``Event Stream Model''
(ESM). This model is a push model that allows a process
to sleep as long as no event occurs but then
immediately awakens a process when an event occurs.
This model eliminates the polling loop, thus
eliminating latency-inducing sleep between polls and
reducing unnecessary power consumption. To work
properly, the ESM model must be implemented in the
kernel rather than in the application. In this article,
we describe how we implemented the ESM model in Android
operating system (OS). Our results show that with the
event stream model, power consumption is reduced by up
to 23.8\% in certain circumstances, and latency is
reduced by an average of 13.6ms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Chen:2016:ICA,
author = "Renhai Chen and Yi Wang and Jingtong Hu and Duo Liu
and Zili Shao and Yong Guan",
title = "Image-Content-Aware {I/O} Optimization for Mobile
Virtualization",
journal = j-TECS,
volume = "16",
number = "1",
pages = "12:1--12:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950059",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Mobile virtualization introduces extra layers in
software stacks, which leads to performance
degradation. Notably, each I/O operation has to pass
through several software layers to reach the
NAND-flash-based storage systems. This article targets
at optimizing I/O for mobile virtualization, since I/O
becomes one of major performance bottlenecks that
seriously affects the performance of mobile devices.
Among all the I/O operations, a large percentage is to
update metadata. Frequently updated metadata not only
degrade overall I/O performance but also severely
reduce flash memory lifetime. In this article, we
propose a novel I/O optimization technique to identify
the metadata of a guest file system that is stored in a
virtual machine image file and frequently updated.
Then, these metadata are stored in a small additional
non-volatile memory (NVM), which is faster and more
endurable to greatly improve flash memory's performance
and lifetime. To the best of our knowledge, this is the
first work to identify the file system metadata from
regular data in a guest OS image file with NVM
optimization. The proposed scheme is evaluated on a
real hardware embedded platform. The experimental
results show that the proposed techniques can improve
write performance to 45.21\% in mobile devices with
virtualization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Gu:2016:CPP,
author = "Zonghua Gu and Chao Wang and Haibo Zeng",
title = "Cache-Partitioned Preemption Threshold Scheduling",
journal = j-TECS,
volume = "16",
number = "1",
pages = "13:1--13:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950057",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "For preemptive scheduling with shared cache, different
tasks may cause interference in the shared cache,
leading to Cache-Related Preemption Overhead (CRPD).
Cache partitioning can be used to reduce or eliminate
CRPD. We propose integration of cache partitioning and
Preemption Threshold Scheduling to optimize
schedulability for both Fixed-Priority and Earliest
Deadline First scheduling algorithms on a uniprocessor.
We let each subset of tasks assigned the same cache
partition be a nonpreemptive group by assigning the
same preemption threshold to them, which eliminates
CRPD both within each cache partition and between
different cache partitions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Hu:2016:AWM,
author = "Biao Hu and Kai Huang and Gang Chen and Long Cheng and
Alois Knoll",
title = "Adaptive Workload Management in Mixed-Criticality
Systems",
journal = j-TECS,
volume = "16",
number = "1",
pages = "14:1--14:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950058",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Due to the efficient resource usage of integrating
tasks with different criticality onto a shared
platform, the integration with mixed-criticality tasks
is becoming an increasingly important trend in the
design of real-time systems. One challenge in such a
mixed-criticality system is to maximize the service for
low-critical tasks, while meeting the timing
constraints of high-critical tasks. In this article, we
investigate how to adaptively manage the low-critical
workload during runtime to meet both goals, that is,
providing the service for low-critical tasks as much as
possible and guaranteeing the hard real-time
requirements for high-critical tasks. Unlike previous
methods, which enforce an offline bound towards the
low-critical workload, runtime adaptation approaches
are proposed in which the incoming workload of
low-critical tasks is adaptively regulated by
considering the actual demand of high-critical tasks.
This actual demand of the high-critical tasks, in turn,
is adaptively updated using their historical arrival
information. Based on this adaptation scheme, two
scheduling policies-the priority-adjustment policy and
the workload-shaping policy-are proposed to do the
workload management. In order to reduce online
management overhead, a lightweight scheme with $ O (n
\cdot \log (n)) $ complexity is developed. Extensive
simulation results are presented to demonstrate the
effectiveness of our proposed workload management
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Tuncali:2016:APM,
author = "Cumhur Erkan Tuncali and Georgios Fainekos and
Yann-Hang Lee",
title = "Automatic Parallelization of Multirate Block Diagrams
of Control Systems on Multicore Platforms",
journal = j-TECS,
volume = "16",
number = "1",
pages = "15:1--15:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950055",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article addresses the problem of parallelizing
model block diagrams for real-time embedded
applications on multicore architectures. We describe a
Mixed Integer Linear Programming formulation for
finding a feasible mapping of the blocks to different
CPU cores. For single-rate models, we use an objective
function that minimizes the overall worst-case
execution time. We introduce a set of heuristics to
solve the problem for large models in a reasonable
time. For multirate models, we solve the feasibility
problem for finding a valid mapping. We study the
scalability and efficiency of our approach with
synthetic benchmarks and an engine controller from
Toyota.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Fusella:2016:CAA,
author = "Edoardo Fusella and Alessandro Cilardo",
title = "Crosstalk-Aware Automated Mapping for Optical
Networks-on-Chip",
journal = j-TECS,
volume = "16",
number = "1",
pages = "16:1--16:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930666",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Optical networks-on-chip (NoCs) provide a promising
answer to address the increasing requirements of
ultra-high bandwidth and extremely low power
consumption. Designing a photonic interconnect,
however, involves a number of challenges that have no
equivalent in the electronic domain, particularly the
crosstalk noise, which affects the signal-to-noise
ratio (SNR) possibly resulting in an inoperable
architecture and hence constraining the network
scalability. In this article, we point out the
implications of application-driven task mapping on
crosstalk effects. We motivate the main rationale of
our work and provide a formalization of the problem.
Then we propose a class of algorithms that
automatically map the application tasks onto a generic
mesh-based photonic NoC architecture such that the
worst-case crosstalk is minimized. We also present a
purpose-built experimental setup used for evaluating
several architectural solutions in terms of crosstalk
noise and SNR. The setup is used to collect extensive
results from several real-world applications and case
studies. The collected results show that the crosstalk
noise can be significantly reduced by adopting our
approach, thereby allowing higher network scalability,
and can exhibit encouraging improvements over
application-oblivious architectures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Iida:2016:GET,
author = "Yuki Iida and Yusuke Fujii and Takuya Azumi and
Nobuhiko Nishio and Shinpei Kato",
title = "{GPUrpc}: Exploring Transparent Access to Remote
{GPUs}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "17:1--17:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950056",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics processing units (GPUs) are increasingly used
for high-performance computing. Programming frameworks
for general-purpose computing on GPUs (GPGPU), such as
CUDA and OpenCL, are also maturing. Driving this trend
is the recent proliferation of mobile devices such as
smartphones and wearable computers. These devices are
increasingly incorporating computationally intensive
applications that involve some form of environmental
recognition such as augmented reality (AR) or voice
recognition. However, devices with low computational
power cannot satisfy such demanding computing
requirements. The CPU load of these devices could be
reduced by offloading computation onto GPUs on the
cloud. This paper presents GPUrpc, a remote procedure
call (RPC) extension to Gdev, which is a rich set of
runtime libraries and device drivers for achieving
first-class GPU resource management. GPUrpc allows
developers to use CUDA for GPGPU development work.
Existing research uses RPCs based on the CUDA
application programming interfaces (APIs); hence, all
CUDA APIs require communication. To reduce
communication overhead, we use an RPC based on a
low-level API than CUDA API and reduced API that does
not require communication. Our evaluation conducted on
Linux and NVIDIA GPUs shows that the basic performance
of our prototype implementation is reliable in
comparison with the existing method. Evaluation using
the Rodinia benchmark suite designed for research in
heterogeneous parallel computing showed that GPUrpc is
effective for applications such as image processing and
data mining. GPUrpc also can improve power consumption
to approximately 1/6 that of CPU processing for
performing $ 512 \times 512 $ matrix multiplication.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Wang:2016:GTB,
author = "Kun Wang and Miao Du and Dejun Yang and Chunsheng Zhu
and Jian Shen and Yan Zhang",
title = "Game-Theory-Based Active Defense for Intrusion
Detection in Cyber-Physical Embedded Systems",
journal = j-TECS,
volume = "16",
number = "1",
pages = "18:1--18:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2886100",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-Physical Embedded Systems (CPESs) are
distributed embedded systems integrated with various
actuators and sensors. When it comes to the issue of
CPES security, the most significant problem is the
security of Embedded Sensor Networks (ESNs). With the
continuous growth of ESNs, the security of transferring
data from sensors to their destinations has become an
important research area. Due to the limitations in
power, storage, and processing capabilities, existing
security mechanisms for wired or wireless networks
cannot apply directly to ESNs. Meanwhile, ESNs are
likely to be attacked by different kinds of attacks in
industrial scenarios. Therefore, there is a need to
develop new techniques or modify the current security
mechanisms to overcome these problems. In this article,
we focus on Intrusion Detection (ID) techniques and
propose a new attack-defense game model to detect
malicious nodes using a repeated game approach. As a
direct consequence of the game model, attackers and
defenders make different strategies to achieve optimal
payoffs. Importantly, error detection and missing
detection are taken into consideration in Intrusion
Detection Systems (IDSs), where a game tree model is
introduced to solve this problem. In addition, we
analyze and prove the existence of pure Nash
equilibrium and mixed Nash equilibrium. Simulations
show that the proposed model can both reduce energy
consumption by up to 50\% compared with the existing
All Monitor (AM) model and improve the detection rate
by up to 10\% to 15\% compared with the existing
Cluster Head (CH) monitor model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Tan:2016:SSH,
author = "Song Tan and Wen-Zhan Song and Steve Yothment and
Junjie Yang and Lang Tong",
title = "{ScorePlus}: a Software-Hardware Hybrid and Federated
Experiment Environment for Smart Grid",
journal = j-TECS,
volume = "16",
number = "1",
pages = "19:1--19:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2964200",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present ScorePlus, a software-hardware hybrid and
federated experiment environment for Smart Grid.
ScorePlus incorporates both a software emulator and
hardware testbed, such that they all follow the same
architecture, and the same Smart Grid application
program can be tested on either of them without any
modification; ScorePlus provides a federated
environment such that multiple software emulators and
hardware testbeds at different locations are able to
connect and form a unified Smart Grid system; ScorePlus
software is encapsulated as a resource plugin in the
OpenStack cloud computing platform, such that it
supports massive deployments with large-scale test
cases in cloud infrastructure.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Majmudar:2016:AOR,
author = "Charvi A. Majmudar and Bashir I. Morshed",
title = "Autonomous {OA} Removal in Real-Time from Single
Channel {EEG} Data on a Wearable Device Using a Hybrid
Algebraic-Wavelet Algorithm",
journal = j-TECS,
volume = "16",
number = "1",
pages = "20:1--20:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983629",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Electroencephalography (EEG) is a non-invasive
technique to record brain activities in natural
settings. Ocular Artifacts (OA) usually contaminates
EEG signals, removal of which is critical for accurate
feature extraction and classification. With the
increasing adoption of wearable technologies,
single-channel real-time EEG systems that often require
real-time signal processing for immediate real-time
feedback are becoming more prevalent. However,
traditional OA removal algorithms usually require
multiple channels of EEG data, are computationally
expensive, and do not perform well in real-time. In
this article, a new hybrid algorithm is proposed that
autonomously detects OA and subsequently removes OA
from a single-channel steaming EEG data in real-time.
The proposed single EEG channel algorithm also does not
require additional reference electrooculography (EOG)
channel. The algorithm has also been implemented on an
embedded hardware platform of single channel wearable
EEG system (NeuroMonitor). The algorithm first detects
the OA zones using an Algebraic approach and then
removes these artifacts from the detected OA zones
using the Discrete Wavelet Transform (DWT)
decomposition method. The de-noising technique is
applied only to the OA zone, which minimizes loss of
neural information outside the OA zone. A qualitative
and quantitative performance evaluation was carried out
with a 0.5s epoch in overlapping sliding window
technique using time-frequency analysis, mean square
coherence, and correlation coefficient statistics. The
hybrid OA removal algorithm demonstrated real-time
operation with 3s latency on the
PSoC-3-microcontroller-based EEG system. Successful
implementation of OA removal from single-channel
real-time EEG data using the proposed algorithm shows
promise for real-time feedback applications of wearable
EEG devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Kuan:2016:SEI,
author = "Yuan-Hung Kuan and Yuan-Hao Chang and Tseng-Yi Chen
and Po-Chun Huang and Kam-Yiu Lam",
title = "Space-Efficient Index Scheme for {PCM}-Based
Multiversion Databases in Cyber-Physical Systems",
journal = j-TECS,
volume = "16",
number = "1",
pages = "21:1--21:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950060",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we study the indexing problem of
using PCM as the storage medium for embedded
multiversion databases in cyber-physical systems
(CPSs). Although the multiversion B$^+$ -tree (MVBT)
index has been shown to be efficient in managing
multiple versions of data items in a database, MVBT is
designed for databases residing in traditional
block-oriented storage devices. It can have serious
performance problems when the databases are on
phase-change memory (PCM). Since the embedded
multiversion database in CPSs may have limited storage
space and are update intensive, to resolve the problems
of MVBT of lack of space efficiency and heavy update
cost, we propose a new index scheme, called
space-efficient multiversion index (SEMI), to enhance
the space utilization and access performance in serving
various types of queries. In SEMI, since the number of
keys in the database may be small, instead of using a B
-tree index, we propose to use a binary-search tree to
organize the index keys. Furthermore, multiple versions
of the same data item may be stored consecutively and
indexed by a single entry to maximize the space
utilization and at the same time to enhance the
performance in serving version-range queries.
Analytical studies have been conducted on SEMI, and a
series of experiments have been performed to evaluate
its performance as compared with MVBT under different
workloads. The experimental results have demonstrated
that SEMI can achieve very high space utilization and
has better performance in serving update transactions
and range queries as compared with MVBT.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Kartal:2016:MDR,
author = "Yusuf Bora Kartal and Ece G{\"u}ran Schmidt and Klaus
Werner Schmidt",
title = "Modeling Distributed Real-Time Systems in {TIOA} and
{UPPAAL}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "22:1--22:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2964202",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The mission- and life-critical properties of
distributed real-time systems require concurrent
modeling, analysis, and formal verification in the
design stage. The timed input/output automata (TIOA)
framework and the UPPAAL software package are two
widely used modeling and verification tools for this
purpose. To this end, we develop the algorithm
TUConvert for converting distributed TIOA models to
UPPAAL behavioral models and formally prove its
correctness. We demonstrate the applicability of our
algorithm by the formal verification of a distributed
real-time industrial communication protocol that is
modeled by TIOA.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Asyaban:2016:ASB,
author = "Sedigheh Asyaban and Mehdi Kargahi and Lothar Thiele
and Morteza Mohaqeqi",
title = "Analysis and Scheduling of a Battery-Less
Mixed-Criticality System with Energy Uncertainty",
journal = j-TECS,
volume = "16",
number = "1",
pages = "23:1--23:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2964201",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We consider a battery-less real-time embedded system
equipped with an energy harvester. It scavenges energy
from an environmental resource according to some
stochastic patterns. The success of jobs is threatened
in the case of energy shortage, which might be due to
lack of harvested energy, losses originated from the
super-capacitor self-discharge, as well as power
consumption of executed tasks. The periodic real-time
tasks of the system follow a dual-criticality model. In
addition, each task has a minimum required success
ratio that needs to be satisfied in steady state. We
analytically evaluate the behavior of such a system in
terms of its energy-related success ratio for a given
schedule. Based on these results, we propose a
scheduling algorithm that satisfies both temporal and
success-ratio constraints of the jobs, while respecting
task criticalities and corresponding system modes. The
accuracy of the analytical method as well as its
dependence on the numerical computations and other
model assumptions are extensively discussed through
comparison with simulation results. Also, the efficacy
of the proposed scheduling algorithm is studied through
comparison to some existing non-mixed- and
mixed-criticality scheduling algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Manna:2016:ITS,
author = "Kanchan Manna and Shivam Swami and Santanu
Chattopadhyay and Indranil Sengupta",
title = "Integrated Through-Silicon Via Placement and
Application Mapping for {$3$D} Mesh-Based {NoC}
Design",
journal = j-TECS,
volume = "16",
number = "1",
pages = "24:1--24:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2968446",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a solution to the integrated
problem of Through-Silicon Via (TSV) placement and
mapping of cores to the routers in a three-dimensional
mesh-based Network-on-Chip (NoC) system. TSV geometry
restricts their number in three-dimensional (3D) ICs.
As a result, only about 25\% of routers in a 3D NoC can
possess vertical connections. Mapping plays an
important role in evolving good system solutions in
such a situation. TSVs have been placed with detailed
consultation with the application mapping process. The
integrated problem was first solved using the exact
method of Integer Liner Programming (ILP). Next, a
solution was obtained via a Particle Swarm Optimization
(PSO) formulation. Several augmentations to the basic
PSO strategy have been proposed to generate
good-quality solutions. The results obtained are better
than many of the contemporary approaches and close to
the theoretical situation in which all routers are 3D
in nature.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Tajik:2016:SRS,
author = "Hossein Tajik and Bryan Donyanavard and Nikil Dutt and
Janmartin Jahn and J{\"o}rg Henkel",
title = "{SPMPool}: Runtime {SPM} Management for
Memory-Intensive Applications in Embedded Many-Cores",
journal = j-TECS,
volume = "16",
number = "1",
pages = "25:1--25:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2968447",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Distributed Scratchpad Memories (SPMs) in embedded
many-core systems require careful selection of data
placement to achieve good performance. Applications
mapped to these platforms have varying memory
requirements based on their runtime behavior, resulting
in under- or overutilization of the local SPMs. We
propose SPMPool to share the available on-chip SPMs on
many-cores among concurrently executing applications in
order to reduce the overall memory access latency. By
pooling SPM resources, we can assign underutilized
memory resources, due to idle cores or low memory
usage, to applications dynamically. SPMPool is the
first workload-aware SPM mapping solution for
many-cores that dynamically allocates data at
runtime-using profiled data-to address the
unpredictable set of concurrently executing
applications. Our experiments on workloads with varying
interapplication memory intensity show that SPMPool can
achieve up to 76\% reduction in memory access latency
for configurations ranging from 16 to 256 cores,
compared to the traditional approach that limits
executing cores to use their local SPMs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Schurmans:2016:FAE,
author = "Stefan Sch{\"u}rmans and Gereon Onnebrink and Rainer
Leupers and Gerd Ascheid and Xiaotao Chen",
title = "Frequency-Aware {ESL} Power Estimation for {ARM
Cortex-A9} Using a Black Box Processor Model",
journal = j-TECS,
volume = "16",
number = "1",
pages = "26:1--26:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2987375",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Power estimation has become a strongly desired feature
in Electronic System Level (ESL) simulations. Most
existing power estimation approaches for this
abstraction level require component models with
observable internals. However, most ESL models of
modern processors are delivered as black box
components. This work presents a tool-based ESL power
estimation methodology for black box models and its
extension for multiple clock frequencies. The
evaluation uses hardware measurements of the ARM
Cortex-A9 subsystem of the OMAP4460 chip for reference.
The achieved estimation error is 5\% on average for
fixed-frequency power models and 7\% for multifrequency
power models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Weinstock:2016:PSS,
author = "Jan Henrik Weinstock and Luis Gabriel Murillo and
Rainer Leupers and Gerd Ascheid",
title = "Parallel {SystemC} Simulation for {ESL} Design",
journal = j-TECS,
volume = "16",
number = "1",
pages = "27:1--27:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2987374",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Virtual platforms have become essential tools for the
design of embedded systems. Developers rely on them for
design space exploration and software debugging.
However, with rising HW/SW complexity and the need to
simulate more and more processors simultaneously, the
performance of virtual platforms degrades rapidly.
Parallel simulation techniques can help to counter this
by leveraging multicore PCs, which are widely available
today. This work presents a novel parallel simulation
approach that is targeted toward acceleration of
virtual platforms from the ESL domain. By trading some
timing accuracy, multiprocessor virtual platforms can
be accelerated by up to $ 3.4 \times $ on regular
quad-core workstations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Shukla:2017:ECC,
author = "Sandeep K. Shukla",
title = "Editorial: Continuing the Course",
journal = j-TECS,
volume = "16",
number = "2",
pages = "28:1--28:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3043965",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fischmeister:2017:GES,
author = "Sebastian Fischmeister and Jason Xue",
title = "Guest Editorial: Special Issue on {LCTES 2015}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "29:1--29:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3041038",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cunha:2017:DSC,
author = "Marcos Aur{\'e}lio Pinto Cunha and Omayma Matoussi and
Fr{\'e}d{\'e}ric P{\'e}trot",
title = "Detecting Software Cache Coherence Violations in
{MPSoC} Using Traces Captured on Virtual Platforms",
journal = j-TECS,
volume = "16",
number = "2",
pages = "30:1--30:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2990193",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Software cache coherence schemes tend to be the
solution of choice in dedicated multi/many core systems
on chip, as they make the hardware much simpler and
predictable. However, despite the developers' effort,
it is hard to make sure that all preventive
measurements are taken to ensure coherence. In this
work, we propose a method to identify the potential
cache coherence violations using traces obtained from
virtual platforms. These traces contain causality
relations among events, which allow first to simplify
the analysis, and second to avoid relying on
timestamps. Our method identifies potential violations
that may occur during a given execution for
write-through and write-back cache policies. Therefore,
it is independent of the software coherence protocol.
We conducted experiments on parallel applications
running on a lightweight SMP operating system, and we
were able to detect coherence issues that we could then
solve.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zheng:2017:DDC,
author = "Wenguang Zheng and Hui Wu",
title = "Dynamic Data-Cache Locking for Minimizing the {WCET}
of a Single Task",
journal = j-TECS,
volume = "16",
number = "2",
pages = "31:1--31:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2994602",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Caches have been widely used in modern embedded
processors to bridge the increasing speed gap between
processors and off-chip memory. In real-time embedded
systems, computing the Worst-Case Execution Time (WCET)
of a task is essential for the task scheduler to
construct a valid schedule for a task set.
Unfortunately, caches make it much harder to compute
the WCET of a task. Cache locking has been proposed to
alleviate the timing unpredictability problem caused by
caches. In this article, we investigate the following
WCET-aware data-cache locking problem for a single
task. Given a task, select a set of variables as locked
cache contents such that the WCET of the task is
minimized. We propose two dynamic full cache-locking
approaches. The first formulates the problem as a
global Integer Linear Programming (ILP) problem that
simultaneously selects a minimum set of memory blocks
of variables as locked cache contents and allocates
them to the data cache. The second iteratively
constructs a subgraph of the Control Flow Graph (CFG)
of the task in which the lengths of all the paths are
close to the longest path length, uses an ILP
formulation to select a minimum set of memory blocks of
variables in the subgraph as locked cache contents, and
allocates the selected memory blocks to the data cache.
We also propose two novel, efficient data-cache
allocation algorithms for the global ILP approach and
the iterative ILP approach, respectively. We have
implemented both approaches and compared them with two
state-of-the-art approaches, the longest path-based
dynamic cache-locking approach and the static WCET
analysis approach without cache locking by using a set
of benchmarks from the M{\"a}lardalen WCET benchmark
suite, SNU real-time benchmarks, and Powerstone
benchmarks. Compared to the static WCET analysis
approach, the average WCET improvements of the first
approach range between 11.4\% and 26.4\%. Compared to
the longest path--based, dynamic cache-locking
approach, the average WCET improvements of the first
approach range between 5.0\% and 15.4\%. The second
approach performs slightly better than the first
approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:CDS,
author = "Qingrui Liu and Changhee Jung and Dongyoon Lee and
Devesh Tiwari",
title = "Compiler-Directed Soft Error Detection and Recovery to
Avoid {DUE} and {SDC} via {Tail-DMR}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "32:1--32:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930667",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents Clover, a compiler-directed soft
error detection and recovery scheme for lightweight
soft error resilience. The compiler carefully generates
soft-error-tolerant code based on idempotent processing
without explicit checkpoints. During program execution,
Clover relies on a small number of acoustic wave
detectors deployed in the processor to identify soft
errors by sensing the wave made by a particle strike.
To cope with DUEs (detected unrecoverable errors)
caused by the sensing latency of error detection,
Clover leverages a novel selective instruction
duplication technique called tail-DMR (dual modular
redundancy) that provides a region-level error
containment. Once a soft error is detected by either
the sensors or the tail-DMR, Clover takes care of the
error as in the case of exception handling. To recover
from the error, Clover simply redirects program control
to the beginning of the code region where the error is
detected. The experimental results demonstrate that the
average runtime overhead is only 26\%, which is a 75\%
reduction compared to that of the state-of-the-art soft
error resilience technique. In addition, this article
evaluates an alternative technique called tail-wait,
comparing it to Clover. According to the evaluation
with the different processor configurations and the
various error detection latencies, Clover turns out to
be a superior technique, achieving 1.06 to 3.49 $
\times $ speedup over the tail-wait.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Procter:2017:PAS,
author = "Adam Procter and William L. Harrison and Ian Graves
and Michela Becchi and Gerard Allwein",
title = "A Principled Approach to Secure Multi-core Processor
Design with {ReWire}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "33:1--33:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2967497",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "There is no such thing as high assurance without high
assurance hardware. High assurance hardware is
essential because any and all high assurance systems
ultimately depend on hardware that conforms to, and
does not undermine, critical system properties and
invariants. And yet, high assurance hardware
development is stymied by the conceptual gap between
formal methods and hardware description languages used
by engineers. This article advocates a
semantics-directed approach to bridge this conceptual
gap. We present a case study in the design of secure
processors, which are formally derived via principled
techniques grounded in functional programming and
equational reasoning. The case study comprises the
development of secure single- and dual-core variants of
a single processor, both based on a common semantic
specification of the ISA. We demonstrate via formal
equational reasoning that the dual-core processor
respects a ``no-write-down'' information flow policy.
The semantics-directed approach enables a modular and
extensible style of system design and verification. The
secure processors require only a very small amount of
additional code to specify and implement, and their
security verification arguments are concise and
readable. Our approach rests critically on ReWire, a
functional programming language providing a suitable
foundation for formal verification of hardware designs.
This case study demonstrates both ReWire's
expressiveness as a programming language and its power
as a framework for formal, high-level reasoning about
hardware systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2017:ESS,
author = "Li-Pin Chang and Po-Han Sung and Po-Tsang Chen and
Po-Hung Chen",
title = "Eager Synching: a Selective Logging Strategy for Fast
{\tt fsync()} on Flash-Based {Android} Devices",
journal = j-TECS,
volume = "16",
number = "2",
pages = "34:1--34:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930668",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Flash storage has been a standard component in Android
devices. Recent research has reported that application
data management in Android involves frequent fsync()
operations. The current fsync() implementations,
including those of ext4 and F2FS, have several common
drawbacks. Specifically, ext4 commits a transaction
every time to sync a file, whereas F2FS commits a
checkpoint to sync a directory. Committing a
transaction or checkpoint flushes all dirty data from
the page cache to the flash storage via many small,
random block write requests. The resultant high I/O
frequency and excessive write traffic cause a high
fsync() latency. This study presents an efficient
fsync() method, called eager synching, which is based
on a simple idea: write less, and write sequentially.
To sync a file, eager synching writes only a subset of
all dirty data in the page cache to a sequential log
space using a few sequential block write requests. It
does not involve transaction or checkpoint committing.
We successfully implemented eager synching in ext4 and
F2FS, and our experimental results show that, compared
with the original fsync() methods of ext4 and F2FS,
eager synching reduced the average and maximum fsync()
latencies by up to 72\% and 91\%, respectively,
block-level write traffic by up to 35\%, and I/O
frequency by up to 66\%. Through enhanced crash
recovery procedures, eager synching can successfully
recover all previously synched files while still
guaranteeing the file system integrity. We also
conducted live application replays using the proposed
eager synching approach and observed that this approach
significantly improved the application frame updating
rate and application execution time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dietrich:2017:GOF,
author = "Christian Dietrich and Martin Hoffmann and Daniel
Lohmann",
title = "Global Optimization of Fixed-Priority Real-Time
Systems by {RTOS}-Aware Control-Flow Analysis",
journal = j-TECS,
volume = "16",
number = "2",
pages = "35:1--35:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950053",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber--physical systems typically target a dedicated
purpose; their embedded real-time control system, such
as an automotive control unit, is designed with a
well-defined set of functionalities. On the software
side, this results in a large amount of implicit and
explicit static knowledge about the system and its
behavior already at compile time. Compilers have become
increasingly better at extracting and exploiting such
static knowledge. For instance, many optimizations have
been lifted up to the interprocedural or even to the
whole-program level. However, whole-program
optimizations generally stop at the application--kernel
boundary: control-flow transitions between different
threads are not yet analyzed. In this article, we cross
the application--kernel boundary by combining the
semantics of a real-time operating system (RTOS) with
deterministic fixed-priority scheduling (e.g.,
OSEK/AUTOSAR, ARINC 653, $ \mu $ITRON, POSIX.4) and the
explicit application knowledge to enable system-wide,
flow-sensitive compiler optimizations. We present two
methods to extract a cross-kernel, control-flow--graph
that provides a global view on all possible execution
paths of a real-time system. Having this knowledge at
hand, we tailor the operating system kernel more
closely to the particular application scenario. For the
example of a real-world safety-critical control system,
we present three possible use cases. (1) Runtime
optimizations, by means of specialized system calls for
each call site, allow one speed up the kernel execution
path by 28\% in our benchmark scenario. Furthermore, we
target transient hardware fault tolerance with two
automated software-based countermeasures: (2)
generation of OS state assertions on the expected
system behavior, and (3) a system-wide dominator-region
based control-flow error detection, both of which
leverage significant robustness improvements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:MCS,
author = "Jing Liu and Kenli Li and Dakai Zhu and Jianjun Han
and Keqin Li",
title = "Minimizing Cost of Scheduling Tasks on Heterogeneous
Multicore Embedded Systems",
journal = j-TECS,
volume = "16",
number = "2",
pages = "36:1--36:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2935749",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cost savings are very critical in modern heterogeneous
computing systems, especially in embedded systems. Task
scheduling plays an important role in cost savings. In
this article, we tackle the problem of scheduling tasks
on heterogeneous multicore embedded systems with the
constraints of time and resources for minimizing the
total cost, while considering the communication
overhead. This problem is NP-hard and we propose
several heuristic techniques- ISGG, RLD, and RLDG -to
address the problem. Experimental results show that the
proposed algorithms significantly outperform the
existing approaches in terms of cost savings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Llopard:2017:FPA,
author = "Ivan Llopard and Christian Fabre and Albert Cohen",
title = "From a Formalized Parallel Action Language to Its
Efficient Code Generation",
journal = j-TECS,
volume = "16",
number = "2",
pages = "37:1--37:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2990195",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modeling languages propose convenient abstractions and
transformations to handle the complexity of today's
embedded systems. Based on the formalism of the
Hierarchical State Machine, they enable the expression
of hierarchical control parallelism. However, they face
two important challenges when it comes to modeling
data-intensive applications: no unified approach that
also accounts for data-parallel actions and no
effective code optimization and generation flows. We
propose a modeling language extended with parallel
action semantics and hierarchical indexed-state
machines suitable for computationally intensive
applications. Together with its formal semantics, we
present an optimizing model compiler aiming for the
generation of efficient data-parallel
implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Manilov:2017:FRS,
author = "Stanislav Manilov and Bj{\"o}rn Franke and Anthony
Magrath and Cedric Andrieu",
title = "{Free Rider}: a Source-Level Transformation Tool for
Retargeting Platform-Specific Intrinsic Functions",
journal = j-TECS,
volume = "16",
number = "2",
pages = "38:1--38:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2990194",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Short-vector S imd and Dsp instructions are popular
extensions to common Isas. These extensions deliver
excellent performance and compact code for some
compute-intensive applications, but they require
specialized compiler support. To enable the programmer
to explicitly request the use of such an instruction,
many C compilers provide platform-specific intrinsic
functions, whose implementation is handled specially by
the compiler. The use of such intrinsics, however,
inevitably results in nonportable code. In this
article, we develop a novel methodology for retargeting
such nonportable code, which maps intrinsics from one
platform to another, taking advantage of similar
intrinsics on the target platform. We employ a
description language to specify the signature and
semantics of intrinsics and perform graph-based pattern
matching and high-level code transformations to derive
optimized implementations exploiting the target's
intrinsics, wherever possible. We demonstrate the
effectiveness of our new methodology, implemented in
the Free Rider tool, by automatically retargeting
benchmarks derived from OpenCV samples and a complex
embedded application optimized to run on an Arm
Cortex-M4 to an Intel Edison module with Sse4.2
instructions (and vice versa). We achieve a speedup of
up to 3.73 over a plain C baseline, and on average
96.0\% of the speedup of manually ported and optimized
versions of the benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zeng:2017:SLD,
author = "Jing Zeng and Laurence T. Yang and Man Lin and Zili
Shao and Dakai Zhu",
title = "System-Level Design Optimization for Security-Critical
Cyber-Physical-Social Systems",
journal = j-TECS,
volume = "16",
number = "2",
pages = "39:1--39:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2925991",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-physical-social systems (CPSS), an emerging
computing paradigm, have attracted intensive attentions
from the research community and industry. We are facing
various challenges in designing secure, reliable, and
user-satisfied CPSS. In this article, we consider these
design issues as a whole and propose a system-level
design optimization framework for CPSS design where
energy consumption, security-level, and user
satisfaction requirements can be fulfilled while
satisfying constraints for system reliability.
Specifically, we model the constraints (energy
efficiency, security, and reliability) as the penalty
functions to be incorporated into the corresponding
objective functions for the optimization problem. A
smart office application is presented to demonstrate
the feasibility and effectiveness of our proposed
design optimization approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Haar:2017:MGE,
author = "Stefan Haar and Roland Meyer",
title = "Message from the {Guest Editors}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "40:1--40:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3037413",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bujtor:2017:TPD,
author = "Ferenc Bujtor and Lev Sorokin and Walter Vogler",
title = "Testing Preorders for {dMTS}: Deadlock- and the New
{Deadlock-\slash Divergence Testing}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "41:1--41:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2984641",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Testing preorders on component specifications ensure
that replacing a specification by a refined one does
not introduce unwanted behavior in an overall system.
Considering deadlocks as unwanted, the preorder can be
characterized by a failure semantics on Labeled
Transition Systems (LTSs). In previous work, we have
generalized this to Modal Transition Systems (MTSs)
with a new, MTS-specific testing idea. In the present
article, we generalize this idea further to DMTS, a
subclass of disjunctive MTSs. On the one hand, the
testing preorder can be characterized by the same
failure semantics, and dMTS have no additional
expressivity in our setting. On the other hand, the
technical treatment is significantly harder and,
surprisingly, the preorder is not compositional.
Furthermore, we regard deadlocks and divergence
(infinite unobservable runs) as unwanted and
characterize the testing preorder with an unusual
failure-divergence semantics. This preorder is already
on LTSs strictly coarser-and hence arguably better-than
the traditional failure-divergence preorder. It is a
precongruence on dMTS, also for hiding, and much easier
to handle than the deadlock-based preorder. It arises
as well from a new variant of De Nicola's and
Hennessy's must-testing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vijzelaar:2017:MVS,
author = "Stefan Vijzelaar and Wan Fokkink",
title = "Multi-valued Simulation and Abstraction Using Lattice
Operations",
journal = j-TECS,
volume = "16",
number = "2",
pages = "42:1--42:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012282",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Abstractions can cause spurious results, which need to
be verified in the concrete system to gain conclusive
results. Verification based on a multi-valued logic can
distinguish between conclusive and inconclusive
results, provides increased precision, and allows for
encoding additional information into the model. To
ensure a correct abstraction, one can use a mixed
simulation [Meller et al. 2009]. We extend mixed
simulation to include inconsistent values, thereby
resolving an asymmetry and allowing for abstractions
with increased precision when inconsistent values are
available. In addition, we present a set of abstraction
rules, compatible with the extended notion, for
constructing abstract models.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Andre:2017:PPO,
author = "{\'E}tienne Andr{\'e} and Thomas Chatain and C{\'e}sar
Rodr{\'\i}guez",
title = "Preserving Partial-Order Runs in Parametric Time
{Petri} Nets",
journal = j-TECS,
volume = "16",
number = "2",
pages = "43:1--43:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012283",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Parameter synthesis for timed systems aims at deriving
parameter valuations satisfying a given property. In
this article, we target concurrent systems. We use
partial-order semantics for parametric time Petri nets
as a way to both cope with the well-known state-space
explosion due to concurrency and significantly enhance
the result of an existing synthesis algorithm. Given a
reference parameter valuation, our approach synthesizes
other valuations preserving the partial-order
executions of the reference parameter valuation. We
show the applicability of our approach using a tool
applied to asynchronous circuits.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Berard:2017:NIP,
author = "B{\'e}atrice B{\'e}rard and Lo{\"\i}c H{\'e}lou{\"e}t
and John Mullins",
title = "Non-interference in Partial Order Models",
journal = j-TECS,
volume = "16",
number = "2",
pages = "44:1--44:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2984639",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Non-interference (NI) is a property of systems stating
that confidential actions should not cause effects
observable by unauthorized users. Several variants of
NI have been studied for many types of models but
rarely for true concurrency or unbounded models. This
work investigates NI for High-level Message Sequence
Charts (HMSCs), a scenario language for the description
of distributed systems, based on composition of partial
orders. We first propose a general definition of
security properties in terms of equivalence among
observations of behaviors. Observations are naturally
captured by partial order automata, a formalism that
generalizes HMSCs and permits assembling partial
orders. We show that equivalence or inclusion
properties for HMSCs (and hence for partial order
automata) are undecidable, which means in particular
that NI is undecidable for HMSCs. We hence consider
decidable subclasses of partial order automata and
HMSCs. Finally, we define weaker local properties,
describing situations where a system is attacked by a
single agent, and show that local NI is decidable. We
then refine local NI to a finer notion of causal NI
that emphasizes causal dependencies between
confidential actions and observations and extend it to
causal NI with (selective) declassification of
confidential events. Checking whether a system
satisfies local and causal NI and their declassified
variants are PSPACE-complete problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saarikivi:2017:MTS,
author = "Olli Saarikivi and Hern{\'a}n Ponce-De-Le{\'o}n and
Kari K{\"a}hk{\"o}nen and Keijo Heljanko and Javier
Esparza",
title = "Minimizing Test Suites with Unfoldings of
Multithreaded Programs",
journal = j-TECS,
volume = "16",
number = "2",
pages = "45:1--45:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012281",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article focuses on computing minimal test suites
for multithreaded programs. Based on previous work on
test case generation for multithreaded programs using
unfoldings, this article shows how this unfolding can
be used to generate minimal test suites covering all
local states of the program. Generating such minimal
test suites is shown to be NP-complete in the size of
the unfolding. We propose an SMT encoding for this
problem and two methods based on heuristics which only
approximate the solution, but scale better in practice.
Finally, we apply our methods to compute the minimal
test suites for several benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Valmari:2017:SIS,
author = "Antti Valmari",
title = "Stop It, and Be Stubborn!",
journal = j-TECS,
volume = "16",
number = "2",
pages = "46:1--46:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012279",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This publication discusses how automatic verification
of concurrent systems can be made more efficient by
focusing on always may-terminating systems. First,
making a system always may-terminating is a method for
meeting a modelling need that exists independently of
this publication. It is illustrated that without doing
so, non-progress errors may be lost. Second, state
explosion is often alleviated with stubborn, ample, and
persistent set methods. They use expensive cycle or
terminal strong component conditions in many cases. It
is proven that for many important classes of
properties, if the systems are always may-terminating,
then these conditions can be left out.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Isenberg:2017:IIV,
author = "Tobias Isenberg",
title = "Incremental Inductive Verification of Parameterized
Timed Systems",
journal = j-TECS,
volume = "16",
number = "2",
pages = "47:1--47:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2984640",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose and extend an approach for the verification
of safety properties for parameterized timed systems
modeled as networks of timed automata. For this task,
we introduce an incremental workflow that is based on
our algorithm IC3 with Zones. It proceeds in a cycle in
which single models of the system are verified, and the
verification results are employed for the reasoning
about the entire system. Starting with the smallest
instances, the verification of the safety property is
carried out fast and efficient. On successful
verification, the algorithm produces an inductive
strengthening of the safety property. We reuse this
result and try to reason about the entire parameterized
timed system. To this end, we extrapolate the inductive
strengthening into a candidate for the next-larger
model. In case this candidate is a valid inductive
strengthening for the next larger model, our main
theorem reasons about all models of the parameterized
timed system, stating that the safety property holds
true for all models. Otherwise, the main cycle starts
over with the verification of the next larger model.
This workflow is iterated indefinitely, until able to
reason about the entire parameterized timed system,
until a counterexample trace is found, or until the
single models become too large to be handled in the
verification. We reuse the intermediate results in a
Feedback -loop in order to accelerate the verification
runs for the single models. Furthermore, we consider an
extended formalism in comparison to our previous
publications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Siirtola:2017:WDW,
author = "Antti Siirtola and Stavros Tripakis and Keijo
Heljanko",
title = "When Do We Not Need Complex Assume-Guarantee Rules?",
journal = j-TECS,
volume = "16",
number = "2",
pages = "48:1--48:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012280",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We study the need for complex circular
assume-guarantee (AG) rules in formalisms that already
provide the simple precongruence rule. We first
investigate the question for two popular formalisms:
Labeled Transition Systems (LTSs) with weak simulation
and Interface Automata (IA) with alternating
simulation. We observe that, in LTSs, complex circular
AG rules cannot always be avoided, but, in the IA
world, the simple precongruence rule is all we need.
Based on these findings, we introduce modal IA with cut
states, a novel formalism that not only generalizes IA
and LTSs but also allows for compositional reasoning
without complex AG rules.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tang:2017:TFC,
author = "Qi Tang and Twan Basten and Marc Geilen and Sander
Stuijk and Ji-Bo Wei",
title = "{Task-FIFO} Co-Scheduling of Streaming Applications on
{MPSoCs} with Predictable Memory Hierarchy",
journal = j-TECS,
volume = "16",
number = "2",
pages = "49:1--49:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3038484",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article studies the scheduling of real-time
streaming applications on multiprocessor
systems-on-chips with predictable memory hierarchy. An
iteration-based task-FIFO co-scheduling framework is
proposed for this problem. We obtain FIFO size
distributions using Pareto space searching, based on
which the task-to-processor mapping is obtained with
the potential FIFO allocation being taken into account;
then, the FIFO-to-memory allocation is optimized to
minimize the total memory access cost; finally, a
self-timed throughput analysis method that considers
memory and direct memory access controller contention
is utilized to analyze the throughput. Our methods are
validated by a set of synthesized and practical
applications on different platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Choo:2017:EDF,
author = "Kim-Kwang Raymond Choo and Yunsi Fei and Yang Xiang
and Yu Yu",
title = "Embedded Device Forensics and Security",
journal = j-TECS,
volume = "16",
number = "2",
pages = "50:1--50:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3015662",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "While the increasing digitalization of our society and
amalgamation of embedded devices into the
ever-increasing facets of our daily life (e.g., in
smart and intelligent vehicles, smart cities and smart
nations, and critical infrastructure sectors) have
resulted in improved productivity and quality of life,
the trend has also resulted in a trend of increasing
frequency and sophistication of cyber exploitation and
cyber threats. Hence, there is a need for coordinated
efforts from the research community to address
resulting concerns using both cryptographic and
non-cryptographic solutions, such as those presented in
this special section.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Puthal:2017:DDK,
author = "Deepak Puthal and Surya Nepal and Rajiv Ranjan and
Jinjun Chen",
title = "{DLSeF}: a Dynamic Key-Length-Based Efficient
Real-Time Security Verification Model for Big Data
Stream",
journal = j-TECS,
volume = "16",
number = "2",
pages = "51:1--51:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2937755",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Applications in risk-critical domains such as
emergency management and industrial control systems
need near-real-time stream data processing in
large-scale sensing networks. The key problem is how to
ensure online end-to-end security (e.g.,
confidentiality, integrity, and authenticity) of data
streams for such applications. We refer to this as an
online security verification problem. Existing data
security solutions cannot be applied in such
applications as they cannot deal with data streams with
high-volume and high-velocity data in real time. They
introduce a significant buffering delay during security
verification, resulting in a requirement for a large
buffer size for the stream processing server. To
address this problem, we propose a Dynamic
Key-Length-Based Security Framework (DLSeF) based on a
shared key derived from synchronized prime numbers; the
key is dynamically updated at short intervals to thwart
potential attacks to ensure end-to-end security.
Theoretical analyses and experimental results of the
DLSeF framework show that it can significantly improve
the efficiency of processing stream data by reducing
the security verification time and buffer usage without
compromising security.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Song:2017:SSI,
author = "Jun Song and Fan Yang and Kim-Kwang Raymond Choo and
Zhijian Zhuang and Lizhe Wang",
title = "{SIPF}: a Secure Installment Payment Framework for
Drive-Thru {Internet}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "52:1--52:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3014584",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Ensuring the security and privacy of vehicular ad hoc
networks (VANETs) and related services such as secure
payment has been the focus of recent research efforts.
Existing secure payment solutions generally require
stable and reliable network connection. This is,
however, a challenge in a VANET setting. Drive-thru
Internet, a secure payment solution for VANETs,
involves a great number of fast-moving vehicles
competing for connections/communications
simultaneously. Thus, service providers may find it
challenging to provide real-time payment services or
may have to sacrifice the confidentiality and the
authenticity of payment vouchers for usability. In this
article, we propose a secure installment payment
framework for drive-thru Internet deployment in a VANET
setting. The framework also provides the capability to
embody properties such as confidentiality of payment
vouchers, offline signature verification, periodical
reconciliation, and installment payment. Performance
evaluation and security analysis demonstrate the
utility of the framework in a VANET setting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:EEC,
author = "Zhe Liu and Jian Weng and Zhi Hu and Hwajeong Seo",
title = "Efficient Elliptic Curve Cryptography for Embedded
Devices",
journal = j-TECS,
volume = "16",
number = "2",
pages = "53:1--53:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2967103",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many resource-constrained embedded devices, such as
wireless sensor nodes, require public key encryption or
a digital signature, which has induced plenty of
research on efficient and secure implementation of
elliptic curve cryptography (ECC) on 8-bit processors.
In this work, we study the suitability of a special
class of finite fields, called optimal prime fields
(OPFs), for a ``lightweight'' ECC implementation with a
view toward high performance and security. First, we
introduce a highly optimized arithmetic library for
OPFs that includes two implementations for each finite
field arithmetic operation, namely a
performance-optimized version and a security-optimized
variant. The latter is resistant against simple power
analysis attacks in the sense that it always executes
the same sequence of instructions, independent of the
operands. Based on this OPF library, we then describe a
performance-optimized and a security-optimized
implementation of scalar multiplication on the elliptic
curve over OPFs at several security levels. The former
uses the Gallant-Lambert-Vanstone method on twisted
Edwards curves and reaches an execution time of 3.14M
cycles (over a 160-bit OPF) on an 8-bit ATmega128
processor, whereas the latter is based on a Montgomery
curve and executes in 5.53M cycles.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fu:2017:DFA,
author = "Shan Fu and Guoai Xu and Juan Pan and Zongyue Wang and
An Wang",
title = "Differential Fault Attack on {ITUbee} Block Cipher",
journal = j-TECS,
volume = "16",
number = "2",
pages = "54:1--54:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2967610",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Differential Fault Attack (DFA) is a powerful
cryptanalytic technique to retrieve secret keys by
exploiting the faulty ciphertexts generated during
encryption procedure. This article proposes a novel DFA
attack that is effective on ITUbee, a software-oriented
block cipher for resource-constrained devices.
Different from other DFA, our attack makes use of not
only faulty values, but also differences between
fault-free intermediate values corresponding to 2
plaintexts, which combine traditional differential
analysis with DFA. The possible injection positions
with different number of faults are discussed. The most
efficient attack takes 2$^{25}$ round function
operations with 4 faults, which is achieved in a few
seconds on a PC.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2017:RNF,
author = "Yang Li and Mengting Chen and Zhe Liu and Jian Wang",
title = "Reduction in the Number of Fault Injections for Blind
Fault Attack on {SPN} Block Ciphers",
journal = j-TECS,
volume = "16",
number = "2",
pages = "55:1--55:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3014583",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In 2014, a new fault analysis called blind fault
attack (BFA) was proposed, in which attackers can only
obtain the number of different faulty outputs without
knowing the public data. The original BFA requires
480,000 fault injections to recover a 128-bit AES key.
This work attempts to reduce the number of fault
injections under the same attack assumptions. We
analyze BFA from an information theoretical perspective
and introduce a new probability-based distinguisher.
Three approaches are proposed for different attack
scenarios. The best one realized a 66.8\% reduction of
the number of fault injections on AES.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Castiglione:2017:BFI,
author = "Arcangelo Castiglione and Raffaele Pizzolante and
Francesco Palmieri and Barbara Masucci and Bruno
Carpentieri and Alfredo {De Santis} and Aniello
Castiglione",
title = "On-Board Format-Independent Security of Functional
Magnetic Resonance Images",
journal = j-TECS,
volume = "16",
number = "2",
pages = "56:1--56:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2893474",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Functional magnetic resonance imaging (fMRI) provides
an effective and noninvasive tool for researchers to
understand cerebral functions and correlate them with
brain activities. In addition, with the ever-increasing
diffusion of the Internet, such images may be exchanged
in several ways, allowing new research and medical
services. On the other hand, ensuring the security of
exchanged fMRI data becomes a main concern due to their
special characteristics arising from strict ethics and
legislative and diagnostic implications. Again, the
risks increase when dealing with open environments like
the Internet. For this reason, security mechanisms that
ensure protection of such data are strongly required.
However, we remark that the mechanisms commonly
employed for data protection are doomed to fail when
dealing with imaging data. In this article, we propose
a novel watermarking scheme explicitly addressed for
this type of imaging. Such a scheme can be used for
several purposes, particularly to ensure authenticity
and integrity. Moreover, we show how to integrate our
scheme within commercial off-the-shelf fMRI system.
Finally, the validity and the efficiency of our scheme
has been assessed through testing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:PMH,
author = "Jianghua Liu and Jinhua Ma and Wei Wu and Xiaofeng
Chen and Xinyi Huang and Li Xu",
title = "Protecting Mobile Health Records in Cloud Computing: a
Secure, Efficient, and Anonymous Design",
journal = j-TECS,
volume = "16",
number = "2",
pages = "57:1--57:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983625",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Electronic healthcare (eHealth) systems have replaced
traditional paper-based medical systems due to
attractive features such as universal accessibility,
high accuracy, and low cost. As a major constituent
part of eHealth systems, mobile healthcare (mHealth)
applies Mobile Internet Devices (MIDs) and Embedded
Devices (EDs), such as tablets, smartphones, and other
devices embedded in the bodies of individuals, to
improve the quality of life and provide more convenient
healthcare services for patients. Unfortunately, MIDs
and EDs have only limited computational capacity,
storage space, and power supply. By taking this into
account, we present a new design to guarantee the
integrity of eHealth records and the anonymity of the
data owner in a more efficient and flexible way. The
essence of our design is a general method which can
convert any secure Attribute-Based Signature (ABS)
scheme into a highly efficient and secure
Online/Offline Attribute-Based Signature (OOABS)
scheme. We prove the security and analyze the
efficiency improvement of the new design. Additionally,
we illustrate the proposed generic construction by
applying it to a specific ABS scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2017:SRS,
author = "Wei Wang and Peng Xu and Laurence Tianruo Yang and
Willy Susilo and Jinjun Chen",
title = "Securely Reinforcing Synchronization for Embedded
Online Contests",
journal = j-TECS,
volume = "16",
number = "2",
pages = "58:1--58:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2899000",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "When competing in eBay bidding, online games, or
e-exams in embedded computing environments, people
naturally face asynchronous starts from different
computing devices, which is treated as a security risk
of online contests. The security risks of online
contests also include eavesdropping during data
transmission without intended rights, and false starts
by malicious competitors, which also means asynchrony
in contests. Accordingly, online contests need security
guarantees, especially on synchronization. In this
article, for synchronic and secure starts in a contest,
we update security requirements of confidentiality,
anonymity, and synchrony, comparing the current work to
our previous work. Based on the updated requirements,
we propose a general framework for the Advanced Secure
Synchronized Reading (ASSR) system, which can hold
multiple contests simultaneously in the cloud. It is
important to note that the system can ignore the
impacts of heterogeneity among competitors. Considering
the heterogeneity both on transmission and computing,
we construct a novel Randomness-reused Identity Based
Key Encapsulation Mechanism (RIBKEM) to support
separable decapsulation, which can shorten both
decryption delay and transmission delay with the best
efforts. Finally, ASSR enhances synchronization
achievement for contest starts with heterogeneous
delays of competitors while satisfying other security
requirements. As a complement, the analysis on the
provable security of ASSR is given, as well as a
further analysis on the achievement of
synchronization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mozaffari-Kermani:2017:FDA,
author = "Mehran Mozaffari-Kermani and Reza Azarderakhsh and
Anita Aghaie",
title = "Fault Detection Architectures for Post-Quantum
Cryptographic Stateless Hash-Based Secure Signatures
Benchmarked on {ASIC}",
journal = j-TECS,
volume = "16",
number = "2",
pages = "59:1--59:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930664",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Symmetric-key cryptography can resist the potential
post-quantum attacks expected with the not-so-faraway
advent of quantum computing power. Hash-based,
code-based, lattice-based, and multivariate-quadratic
equations are all other potential candidates, the merit
of which is that they are believed to resist both
classical and quantum computers, and applying ``Shor's
algorithm''-the quantum-computer discrete-logarithm
algorithm that breaks classical schemes-to them is
infeasible. In this article, we propose, assess, and
benchmark reliable constructions for stateless
hash-based signatures. Such architectures are believed
to be one of the prominent post-quantum schemes,
offering security proofs relative to plausible
properties of the hash function; however, it is well
known that their confidentiality does not guarantee
reliable architectures in the presence natural and
malicious faults. We propose and benchmark fault
diagnosis methods for this post-quantum cryptography
variant through case studies for hash functions and
present the simulations and implementations results
(through application-specific integrated circuit
evaluations) to show the applicability of the presented
schemes. The proposed approaches make such hash-based
constructions more reliable against natural faults and
help protecting them against malicious faults and can
be tailored based on the resources available and for
different reliability objectives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gai:2017:SES,
author = "Keke Gai and Longfei Qiu and Min Chen and Hui Zhao and
Meikang Qiu",
title = "{SA--EAST}: Security-Aware Efficient Data Transmission
for {ITS} in Mobile Heterogeneous Cloud Computing",
journal = j-TECS,
volume = "16",
number = "2",
pages = "60:1--60:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2979677",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The expected advanced network explorations and the
growing demand for mobile data sharing and transferring
have driven numerous novel applications in
Cyber-Physical Systems (CPSs), such as Intelligent
Transportation Systems (ITSs). However, current ITS
implementations are restricted by the conflicts between
security and communication efficiency. Focusing on this
issue, this article proposes a Security-Aware Efficient
Data Sharing and Transferring (SA-EAST) model, which is
designed for securing cloud-based ITS implementations.
In applying this approach, we aim to obtain secure
real-time multimedia data sharing and transferring. Our
experimental evaluation has shown that our proposed
model provides an effective performance in securing
communications for ITS.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shu:2017:WDD,
author = "Junliang Shu and Yuanyuan Zhang and Juanru Li and
Bodong Li and Dawu Gu",
title = "Why Data Deletion Fails? {A} Study on Deletion Flaws
and Data Remanence in {Android} Systems",
journal = j-TECS,
volume = "16",
number = "2",
pages = "61:1--61:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3007211",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Smart mobile devices are becoming the main vessel of
personal privacy information. While they carry valuable
information, data erasure is somehow much more
vulnerable than was predicted. The security mechanisms
provided by the Android system are not flexible enough
to thoroughly delete sensitive data. In addition to the
weakness among several provided data-erasing and
file-deleting mechanisms, we also target the Android OS
design flaws in data erasure, and unveil that the
design of the Android OS contradicts some secure
data-erasure demands. We present the data-erasure flaws
in three typical scenarios on mainstream Android
devices, such as the data clearing flaw, application
uninstallation flaw, and factory reset flaw. Some of
these flaws are inherited data-deleting security issues
from the Linux kernel, and some are new vulnerabilities
in the Android system. Those scenarios reveal the data
leak points in Android systems. Moreover, we reveal
that the data remanence on the disk is rarely affected
by the user's daily operation, such as file deletion
and app installation and uninstallation, by a
real-world data deletion latency experiment. After one
volunteer used the Android phone for 2 months, the data
remanence amount was still considerable. Then, we
proposed DataRaider for file recovering from disk
fragments. It adopts a file-carving technique and is
implemented as an automated sensitive information
recovering framework. DataRaider is able to extract
private data in a raw disk image without any file
system information, and the recovery rate is
considerably high in the four test Android phones. We
propose some mitigation for data remanence issues, and
give the users some suggestions on data protection in
Android systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2017:ECS,
author = "Sandeep K. Shukla",
title = "Editorial: Cyber Security, {IoT}, Block Chains-Risks
and Opportunities",
journal = j-TECS,
volume = "16",
number = "3",
pages = "62:1--62:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3087913",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wolf:2017:GES,
author = "Marilyn Wolf and Jason Xue",
title = "Guest Editorial: Special Issue on Embedded Computing
for {IoT}",
journal = j-TECS,
volume = "16",
number = "3",
pages = "63:1--63:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3065713",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ateniese:2017:LCS,
author = "Giuseppe Ateniese and Giuseppe Bianchi and Angelo T.
Capossele and Chiara Petrioli and Dora Spenza",
title = "Low-Cost Standard Signatures for Energy-Harvesting
Wireless Sensor Networks",
journal = j-TECS,
volume = "16",
number = "3",
pages = "64:1--64:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2994603",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This work is motivated by a general question: can
micro-scale energy-harvesting techniques be exploited
to support low-cost standard security solutions on
resource-constrained devices? We focus on guaranteeing
integrity and authentication in Internet of Things
(IoT) and Wireless Sensor Network (WSN) applications.
In this article, we propose techniques to make ECDSA
signatures low cost and implementable on
resource-constrained devices. By combining
precomputation techniques and energy-harvesting
capabilities of modern sensor nodes, we achieve
significant improvement over prior works. In addition,
we show that the cost of ECDSA signatures can be
reduced by up to a factor 10 by using harvesting-aware
optimizations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jayakumar:2017:EAM,
author = "Hrishikesh Jayakumar and Arnab Raha and Jacob R.
Stevens and Vijay Raghunathan",
title = "Energy-Aware Memory Mapping for Hybrid {FRAM--SRAM}
{MCUs} in Intermittently-Powered {IoT} Devices",
journal = j-TECS,
volume = "16",
number = "3",
pages = "65:1--65:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983628",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Forecasts project that by 2020, there will be around
50 billion devices connected to the Internet of Things
(IoT), most of which will operate untethered and
unplugged. While environmental energy harvesting is a
promising solution to power these IoT edge devices, it
introduces new complexities due to the unreliable
nature of ambient energy sources. In the presence of an
unreliable power supply, frequent checkpointing of the
system state becomes imperative, and recent research
has proposed the concept of in-situ checkpointing by
using ferroelectric RAM (FRAM), an emerging
non-volatile memory technology, as unified memory in
these systems. Even though an entirely FRAM-based
solution provides reliability, it is energy inefficient
compared to SRAM due to the higher access latency of
FRAM. On the other hand, an entirely SRAM-based
solution is highly energy efficient but is unreliable
in the face of power loss. This paper advocates an
intermediate approach in hybrid FRAM-SRAM
microcontrollers that involves judicious memory mapping
of program sections to retain the reliability benefits
provided by FRAM while performing almost as efficiently
as an SRAM-based system. We propose an energy-aware
memory mapping technique that maps different program
sections to the hybrid FRAM-SRAM microcontroller such
that energy consumption is minimized without
sacrificing reliability. Our technique consists of
eM-map, which performs a one-time characterization to
find the optimal memory map for the functions that
constitute a program and energy-align, a novel
hardware-software technique that aligns the system's
powered-on time intervals to function execution
boundaries, which results in further improvements in
energy efficiency and performance. Experimental results
obtained using the MSP430FR5739 microcontroller
demonstrate a significant performance improvement of up
to 2x and energy reduction of up to 20\% over a
state-of-the-art FRAM-based solution. Finally, we
present a case study that shows the implementation of
our techniques in the context of a real IoT
application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tiloca:2017:ADB,
author = "Marco Tiloca and Kirill Nikitin and Shahid Raza",
title = "{Axiom}: {DTLS}-Based Secure {IoT} Group
Communication",
journal = j-TECS,
volume = "16",
number = "3",
pages = "66:1--66:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047413",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents Axiom, a DTLS-based approach to
efficiently secure multicast group communication among
IoT-constrained devices. Axiom provides an adaptation
of the DTLS record layer, relies on key material
commonly shared among the group members, and does not
require one to perform any DTLS handshake. We made a
proof-of-concept implementation of Axiom based on the
tinyDTLS library for the Contiki OS and used it to
experimentally evaluate performance of our approach on
real IoT hardware. Results show that Axiom is
affordable on resource-constrained platforms and
performs significantly better than related alternative
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chatterjee:2017:PBS,
author = "Urbi Chatterjee and Rajat Subhra Chakraborty and
Debdeep Mukhopadhyay",
title = "A {PUF}-Based Secure Communication Protocol for
{IoT}",
journal = j-TECS,
volume = "16",
number = "3",
pages = "67:1--67:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3005715",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Security features are of paramount importance for the
Internet of Things (IoT), and implementations are
challenging given the resource-constrained IoT setup.
We have developed a lightweight identity-based
cryptosystem suitable for IoT to enable secure
authentication and message exchange among the devices.
Our scheme employs a Physically Unclonable Function
(PUF) to generate the public identity of each device,
which is used as the public key for each device for
message encryption. We have provided formal proofs of
security in the Session Key Security and Universally
Composable Framework of the proposed protocol, which
demonstrates the resilience of the scheme against
passive and active attacks. We have demonstrated the
setup required for the protocol implementation and
shown that the proposed protocol implementation incurs
low hardware and software overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:PSSa,
author = "Anfeng Liu and Xiao Liu and Zhipeng Tang and Laurence
T. Yang and Zili Shao",
title = "Preserving Smart Sink-Location Privacy with Delay
Guaranteed Routing Scheme for {WSNs}",
journal = j-TECS,
volume = "16",
number = "3",
pages = "68:1--68:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2990500",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A Semi Random Circle routing for mobile Sink joint Ray
Routing for data (SRCRR) scheme is proposed for
preserving sink-location privacy with a delay
guaranteed. In the SRCRR scheme, the data are
directionally routed along ray paths and stored at
intermediate nodes probabilistically. The Sink moves in
a semirandom circular pattern to collect data from the
local nodes occasionally, which guarantees that the
data will be collected with an acceptable delay and
prevents attackers from predicting their locations and
movements. The experimental results indicate that the
performance of the SRCRR scheme is better than that of
the previous schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bennett:2017:DDS,
author = "Terrell R. Bennett and Nicholas Gans and Roozbeh
Jafari",
title = "Data-Driven Synchronization for {Internet-of-Things}
Systems",
journal = j-TECS,
volume = "16",
number = "3",
pages = "69:1--69:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983627",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Internet of Things (IoT) is fueled by the growth
of sensors, actuators, and services that collect and
process raw sensor data. Wearable and environmental
sensors will be a major component of the IoT and
provide context about people and activities that are
occurring. It is imperative that sensors in the IoT are
synchronized, which increases the usefulness and value
of the sensor data and allows data from multiple
sources to be combined and compared. Due to the
heterogeneous nature of sensors (e.g., synchronization
protocols, communication channels, etc.),
synchronization can be difficult. In this article, we
present novel techniques for synchronizing data from
multi-sensor environments based on the events and
interactions measured by the sensors. We present
methods to determine which interactions can likely be
used for synchronization and methods to improve
synchronization by removing erroneous synchronization
points. We validate our technique through experiments
with wearable and environmental sensors in a laboratory
environment. Experiments resulted in median drift error
reduction from 66\% to 98\% for sensors synchronized
through physical interactions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shen:2017:MQC,
author = "Zhaoyan Shen and Zhijian He and Shuai Li and Qixin
Wang and Zili Shao",
title = "A Multi-Quadcopter Cooperative Cyber-Physical System
for Timely Air Pollution Localization",
journal = j-TECS,
volume = "16",
number = "3",
pages = "70:1--70:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3005716",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose a cyber-physical system of unmanned
quadcopters to locate air pollution sources in a timely
manner. The system consists of a physical part and a
cyber part. The physical part includes unmanned
quadcopters equipped with multiple sensors. The cyber
part carries out control laws. We simplify the control
laws by decoupling the quadcopters' horizontal-plane
motion control from vertical motion control. To control
the quadcopter's horizontal-plane motions, we propose a
controller that combines pollutant dynamics with
quadcopter physics. To control the quadcopter's
vertical motions, we adopt an anti-windup
proportional-integral (PI) controller. We further
extend the horizontal-plane control laws from a single
quadcopter to multiple quadcopters. The
multi-quadcopter control laws are distributed and
convergent. We implement a prototype quadcopter and
carry out experiments to verify the vertical control
laws. We also carry out simulations to evaluate the
horizontal-plane control laws. With quadcopter
parameters set commensurate with our prototype
implementation's, our simulations show that the control
laws can drive quadcopters to locate pollution
source(s) in a timely way.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2017:SVA,
author = "Jian Wu and Roozbeh Jafari",
title = "Seamless Vision-assisted Placement Calibration for
Wearable Inertial Sensors",
journal = j-TECS,
volume = "16",
number = "3",
pages = "71:1--71:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3023364",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wearable inertial devices are being widely used in the
applications of activity tracking, health care, and
professional sports, and their usage is on a rapid
rise. Signal processing algorithms for these devices
are often designed to work with a known location of the
wearable sensor on the body. However, in reality, the
wearable sensor may be worn at different body locations
due to the user's preference or unintentional
misplacement. The calibration of the sensor location is
important to ensure that the algorithms operate
correctly. In this article, we propose an
auto-calibration technique for determining the location
of wearables on the body by fusing the 3-axis
accelerometer data from the devices and
three-dimensional camera (i.e., Kinect) information
obtained from the environment. The automatic
calibration is achieved by a cascade
decision-tree-based classifier on top of the minimum
least-squares errors obtained by solving Wahba's
problem, operating on heterogeneous sensors. The core
contribution of our work is that there is no extra
burden on the user as a result of this technique. The
calibration is done seamlessly, leveraging sensor
fusion in an Internet-of-Things setting
opportunistically when the user is present in front of
an environmental camera performing arbitrary movements.
Our approach is evaluated with two different types of
movements: simple actions (e.g., sit-to-stand or
picking up phone) and complicated tasks (e.g., cooking
or playing basketball), yielding 100\% and 82.56\%
recall for simple actions and for complicated tasks,
respectively, in determining the correct location of
sensors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2017:GEA,
author = "Bo-Wei Chen and Wen Ji and Zhu Li",
title = "Guest Editorial for {ACM TECS} Special Issue on
Effective Divide-and-Conquer, Incremental, or
Distributed Mechanisms of Embedded Designs for
Extremely Big Data in Large-Scale Devices",
journal = j-TECS,
volume = "16",
number = "3",
pages = "72:1--72:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3068457",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:DMR,
author = "Anfeng Liu and Xiao Liu and Tianyi Wei and Laurence T.
Yang and Seungmin (Charlie) Rho and Anand Paul",
title = "Distributed Multi-Representative Re-Fusion Approach
for Heterogeneous Sensing Data Collection",
journal = j-TECS,
volume = "16",
number = "3",
pages = "73:1--73:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2974021",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A multi-representative re-fusion (MRRF) approximate
data collection approach is proposed in which multiple
nodes with similar readings form a data coverage set
(DCS). The reading value of the DCS is represented by
an R-node. The set near the Sink is smaller, while the
set far from the Sink is larger, which can reduce the
energy consumption in hotspot areas. Then, a
distributed data-aggregation strategy is proposed that
can re-fuse the value of R-nodes that are far from each
other but have similar readings. Both comprehensive
theoretical and experimental results indicate that the
MRRF approach increases lifetime and energy
efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2017:LBD,
author = "Xiaogang Chen and Z. Jane Wang and Xiangyang Ji",
title = "A Load-Balancing Divide-and-Conquer {SVM} Solver",
journal = j-TECS,
volume = "16",
number = "3",
pages = "74:1--74:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3005347",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Scaling up kernel support vector machine (SVM)
training has been an important topic in recent years.
Despite its theoretical elegance, training kernel SVM
is impractical when facing millions of data. The
divide-and-conquer (DC) strategy is a natural framework
of handling gigantic problems, and the
divide-and-conquer solver for kernel SVM (DC-SVM) is
able to train kernel SVM with millions of data with
limited time cost. However, there are some drawbacks of
the DC-SVM approach. First, it used an unsupervised
clustering method to partition the whole problem, which
is prone to construct singular subsets, and, second, it
is hard to balance the computation load between
sub-problems. To address these issues, this article
proposed a load-balancing partition method for kernel
SVM. First, it clusters sample from one class and then
assigns data samples to the cluster centers by a
distance measure and construct sub-problems; in this
way, it is able to control the computation load and
avoid singular problems. Experimental results show that
the proposed method has better load-balancing
performance than DC-SVM, which implies that it is
suitable for distributed and embedding systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:PSSb,
author = "Bo Liu and Xiao-Tong Yuan and Yang Yu and Qingshan Liu
and Dimitris N. Metaxas",
title = "Parallel Sparse Subspace Clustering via Joint Sample
and Parameter Blockwise Partition",
journal = j-TECS,
volume = "16",
number = "3",
pages = "75:1--75:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063316",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sparse subspace clustering (SSC) is a classical method
to cluster data with specific subspace structure for
each group. It has many desirable theoretical
properties and has been shown to be effective in
various applications. However, under the condition of a
large-scale dataset, learning the sparse sample
affinity graph is computationally expensive. To tackle
the computation time cost challenge, we develop a
memory-efficient parallel framework for computing SSC
via an alternating direction method of multiplier
(ADMM) algorithm. The proposed framework partitions the
data matrix into column blocks and then decomposes the
original problem into parallel multivariate Lasso
regression subproblems and samplewise operations. The
proposed method allows us to allocate multiple
cores/machines for the processing of individual column
blocks. We propose a stochastic optimization algorithm
to minimize the objective function. Experimental
results on real-world datasets demonstrate that the
proposed blockwise ADMM framework is substantially more
efficient than its matrix counterpart used by SSC,
without sacrificing performance in applications.
Moreover, our approach is directly applicable to
parallel neighborhood selection for Gaussian graphical
models structure estimation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kung:2017:CPD,
author = "Sun-Yuan Kung and Thee Chanyaswad and J. Morris Chang
and Peiyuan Wu",
title = "Collaborative {PCA\slash DCA} Learning Methods for
Compressive Privacy",
journal = j-TECS,
volume = "16",
number = "3",
pages = "76:1--76:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996460",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In the Internet era, the data being collected on
consumers like us are growing exponentially, and
attacks on our privacy are becoming a real threat. To
better ensure our privacy, it is safer to let the data
owner control the data to be uploaded to the network as
opposed to taking chance with data servers or third
parties. To this end, we propose compressive privacy, a
privacy-preserving technique to enable the data creator
to compress data via collaborative learning so that the
compressed data uploaded onto the Internet will be
useful only for the intended utility and not be easily
diverted to malicious applications. For data in a
high-dimensional feature vector space, a common
approach to data compression is dimension reduction or,
equivalently, subspace projection. The most prominent
tool is principal component analysis (PCA). For
unsupervised learning, PCA can best recover the
original data given a specific reduced dimensionality.
However, for the supervised learning environment, it is
more effective to adopt a supervised PCA, known as
discriminant component analysis (DCA), to maximize the
discriminant capability. The DCA subspace analysis
embraces two different subspaces. The signal-subspace
components of DCA are associated with the discriminant
distance/power (related to the classification
effectiveness), whereas the noise subspace components
of DCA are tightly coupled with recoverability and/or
privacy protection. This article presents three
DCA-related data compression methods useful for
privacy-preserving applications: --- Utility-driven
DCA: Because the rank of the signal subspace is limited
by the number of classes, DCA can effectively support
classification using a relatively small dimensionality
(i.e., high compression). --- Desensitized PCA: By
incorporating a signal-subspace ridge into DCA, it
leads to a variant especially effective for extracting
privacy-preserving components. In this case, the
eigenvalues of the noise-space are made to become
insensitive to the privacy labels and are ordered
according to their corresponding component powers. ---
Desensitized K-means/SOM: Since the revelation of the
K-means or SOM cluster structure could leak sensitive
information, it is safer to perform K-means or SOM
clustering on a desensitized PCA subspace.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fleming:2017:CDI,
author = "Tom Fleming and Huang-Ming Huang and Alan Burns and
Chris Gill and Sanjoy Baruah and Chenyang Lu",
title = "Corrections to and Discussion of {``Implementation and
Evaluation of Mixed-criticality Scheduling Approaches
for Sporadic Tasks''}",
journal = j-TECS,
volume = "16",
number = "3",
pages = "77:1--77:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2974020",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
note = "See \cite{Huang:2014:IEM}.",
abstract = "The AMC-IA mixed-criticality scheduling analysis was
proposed as an improvement to the AMC-MAX adaptive
mixed-criticality scheduling analysis. However, we have
identified several necessary corrections to the AMC-IA
analysis. In this article, we motivate and describe
those corrections, and discuss and illustrate why the
corrected AMC-IA analysis cannot be shown to outperform
AMC-MAX.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bouraoui:2017:HAE,
author = "Hasna Bouraoui and Chadlia Jerad and Anupam
Chattopadhyay and Nejib Ben Hadj-Alouane",
title = "Hardware Architectures for Embedded Speaker
Recognition Applications: a Survey",
journal = j-TECS,
volume = "16",
number = "3",
pages = "78:1--78:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2975161",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Authentication technologies based on biometrics, such
as speaker recognition, are attracting more and more
interest thanks to the elevated level of security
offered by these technologies. Despite offering many
advantages, such as remote use and low vulnerability,
speaker recognition applications are constrained by the
heavy computational effort and the hard real-time
constraints. When such applications are run on an
embedded platform, the problem becomes more
challenging, as additional constraints inherent to this
specific domain are added. In the literature, different
hardware architectures were used/designed for
implementing a process with a focus on a given
particular metric. In this article, we give a survey of
the state-of-the-art works on implementations of
embedded speaker recognition applications. Our aim is
to provide an overview of the different approaches
dealing with acceleration techniques oriented towards
speaker and speech recognition applications and attempt
to identify the past, current, and future research
trends in the area. Indeed, on the one hand, many
flexible solutions were implemented, using either
General Purpose Processors or Digital Signal
Processors. In general, these types of solutions suffer
from low area and energy efficiency. On the other hand,
high-performance solutions were implemented on
Application Specific Integrated Circuits or Field
Programmable Gate Arrays but at the expense of
flexibility. Based on the available results, we compare
the application requirements vis-{\`a}-vis the
performance achieved by the systems. This leads to the
projection of new research trends that can be
undertaken in the future.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xu:2017:AFA,
author = "Ye Xu and Israel Koren and C. Mani Krishna",
title = "{AdaFT}: a Framework for Adaptive Fault Tolerance for
Cyber-Physical Systems",
journal = j-TECS,
volume = "16",
number = "3",
pages = "79:1--79:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2980763",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Cyber-physical systems (CPS) frequently have to use
massive redundancy to meet application requirements for
high reliability. While such redundancy is required, it
can be activated adaptively, based on the current state
of the controlled plant. Most of the time, the plant is
in a state that allows for a lower level of fault
tolerance. Avoiding the continuous deployment of
massive fault tolerance will greatly reduce the
workload of the CPS, and lower the operating
temperature of the cyber sub-system, thus increasing
its reliability. In this article, we extend our prior
research by demonstrating a software simulation
framework Adaptive Fault Tolerance (AdaFT) that can
automatically generate the sub-spaces within which our
adaptive fault tolerance can be applied. We also show
the theoretical benefits of AdaFT and its actual
implementation in several real-world CPSs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pagliari:2017:ABC,
author = "Daniele Jahier Pagliari and Mario R. Casu and Luca P.
Carloni",
title = "Accelerators for Breast Cancer Detection",
journal = j-TECS,
volume = "16",
number = "3",
pages = "80:1--80:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2983630",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Algorithms used in microwave imaging for breast cancer
detection require hardware acceleration to speed up
execution time and reduce power consumption. In this
article, we present the hardware implementation of two
accelerators for two alternative imaging algorithms
that we obtain entirely from SystemC specifications via
high-level synthesis. The two algorithms present
opposite characteristics that stress the design process
and the capabilities of commercial HLS tools in
different ways: the first is communication bound and
requires overlapping and pipelining of communication
and computation in order to maximize the application
throughput; the second is computation bound and uses
complex mathematical functions that HLS tools do not
directly support. Despite these difficulties, thanks to
HLS, in the span of only 4 months we were able to
explore a large design space and derive about 100
implementations with different cost-performance
profiles, targeting both a Field-Programmable Gate
Array (FPGA) platform and a 32-nm standard-cell
Application Specific Integrated Circuit (ASIC) library.
In addition, we could obtain results that outperform a
previous Register-Transfer Level (RTL) implementation,
which confirms the remarkable progress of HLS tools.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2017:SBT,
author = "Jiunn-Yeu Chen and Wuu Yang and Wei-Chung Hsu and
Bor-Yeh Shen and Quan-Huei Ou",
title = "On Static Binary Translation of {ARM\slash Thumb}
Mixed {ISA} Binaries",
journal = j-TECS,
volume = "16",
number = "3",
pages = "81:1--81:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996458",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Code discovery has been a main challenge for static
binary translation, especially when the source
instruction set architecture has variable-length
instructions, such as the x86 architectures. Due to
embedded data such as PC (program counter)-relative
data, jump tables, or paddings in the code section, a
binary translator may be misled to translate data as
instructions. For variable-length instructions, once a
piece of data is mis-translated as instructions,
decoding subsequent bytes could also go wrong. We are
concerned with static binary translation for the very
popular Advanced RISC Machine (ARM) architectures.
Although ARM is considered a reduced instruction set
computer architecture, it does allow the mix of 32-bit
(ARM) instructions and 16-bit (Thumb) instructions in
the same executables. In addition to different
instruction lengths, the ARM and Thumb instructions are
located at 4-byte or 2-byte aligned addresses,
respectively. Furthermore, because ARM and Thumb
instructions share the same encoding space, a 4-byte
word could sometimes be decoded as one ARM instruction
or two Thumb instructions. The correct decoding of this
4-byte word is actually determined at runtime by the
least-significant bit of the program counter. For
unstripped binaries, the mapping symbols can be used to
identify ARM code regions and Thumb code regions.
However, for stripped binaries, such mapping symbols
are unavailable. We propose a novel solution to
statically translate stripped ARM/Thumb mixed
executables. Our solution is implemented in a static
binary translator. The binary translator further
generates multiple versions of translated code for the
code regions whose types cannot be determined with our
solution. One of the code versions is selected during
runtime. The binary translator also includes a series
of analyses that enable the removal of most useless
code versions. Based on the experimental results on
stripped ARM/Thumb mixed binaries in the SPEC2006 and
Embedded Microprocessor Benchmark Consortium (EEMBC)
benchmark suites, our static binary translator achieves
impressive performance when migrating them to run on
x86 machines and the space overhead is no more than
10\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tan:2017:ITM,
author = "Wilson M. Tan and Paul Sullivan and Hamish Watson and
Joanna Slota-Newson and Stephen A. Jarvis",
title = "An Indoor Test Methodology for Solar-Powered Wireless
Sensor Networks",
journal = j-TECS,
volume = "16",
number = "3",
pages = "82:1--82:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2994604",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Repeatable and accurate tests are important when
designing hardware and algorithms for solar-powered
wireless sensor networks (WSNs). Since no two days are
exactly alike with regard to energy harvesting, tests
must be carried out indoors. Solar simulators are
traditionally used in replicating the effects of
sunlight indoors; however, solar simulators are
expensive, have lighting elements that have short
lifetimes, and are usually not designed to carry out
the types of tests that hardware and algorithm
designers require. As a result, hardware and algorithm
designers use tests that are inaccurate and not
repeatable (both for others and also for the designers
themselves). In this article, we propose an indoor test
methodology that does not rely on solar simulators. The
test methodology has its basis in astronomy and
photovoltaic cell design. We present a generic design
for a test apparatus that can be used in carrying out
the test methodology. We also present a specific design
that we use in implementing an actual test apparatus.
We test the efficacy of our test apparatus and, to
demonstrate the usefulness of the test methodology,
perform experiments akin to those required in projects
involving solar-powered WSNs. Results of the said tests
and experiments demonstrate that the test methodology
is an invaluable tool for hardware and algorithm
designers working with solar-powered WSNs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2017:SUE,
author = "Tseng-Yi Chen and Yuan-Hao Chang and Shuo-Han Chen and
Nien-I Hsu and Hsin-Wen Wei and Wei-Kuan Shih",
title = "On Space Utilization Enhancement of File Systems for
Embedded Storage Systems",
journal = j-TECS,
volume = "16",
number = "3",
pages = "83:1--83:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2820488",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Since the mid-2000s, mobile/embedded computing systems
conventionally have limited computing power, Random
Access Memory (RAM) space, and storage capacity due to
the consideration of their cost, energy consumption,
and physical size. Recently, some of these systems,
such as mobile phone and embedded consumer electronics,
have more powerful computing capability, so they manage
their data in small flash storage devices (e.g.,
Embedded Multi Media Card (eMMC) and Secure Digital
(SD) cards) with a simple file system. However, the
existing file systems usually have low space
utilization for managing small files and the tail data
of large files. In this work, we thus propose a dynamic
tail packing scheme to enhance the space utilization of
file systems over flash storage devices in embedded
computing systems by dynamically aggregating/packing
the tail data of (small) files together. To evaluate
the benefits and overheads of the proposed scheme, we
theoretically formulate analysis equations for
obtaining the best settings in the dynamic tail packing
scheme. Additionally, the proposed scheme was
implemented in the file system of Linux operating
systems to evaluate its capability. The results
demonstrate that the proposed scheme could
significantly improve the space utilization of existing
file systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Landy:2017:SAS,
author = "Aaron Landy and Greg Stitt",
title = "Serial Arithmetic Strategies for Improving {FPGA}
Throughput",
journal = j-TECS,
volume = "16",
number = "3",
pages = "84:1--84:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996459",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Serial arithmetic has been shown to offer attractive
advantages in area for field-programmable gate array
(FPGA) datapaths but suffers from a significant
reduction in throughput compared to traditional
bit-parallel designs. In this work, we perform a
performance and trade-off analysis that
counterintuitively shows that, despite the decreased
throughput of individual serial operators, replication
of serial arithmetic can provide a 2.1 $ \times $
average increase in throughput compared to bit-parallel
pipelines for common FPGA applications. We complement
this analysis with a novel SerDes architecture that
enables existing FPGA pipelines to be replaced with
serial logic with potentially higher throughput. We
also present a serialized sliding-window architecture
that improves average throughput 2.4 $ \times $
compared to existing bit-parallel work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alur:2017:SBR,
author = "Rajeev Alur and Vojtech Forejt and Salar Moarref and
Ashutosh Trivedi",
title = "Schedulability of Bounded-Rate Multimode Systems",
journal = j-TECS,
volume = "16",
number = "3",
pages = "85:1--85:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996797",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Bounded-rate multimode systems are hybrid systems that
switch freely among a finite set of modes, and whose
dynamics are specified by a finite number of
real-valued variables with mode-dependent rates that
vary within given bounded sets. The scheduler
repeatedly proposes a time and a mode, while the
environment chooses an allowable rate for that mode;
the state of the system changes linearly in the
direction of the rate. The scheduler aims to keep the
state within a safe set, while the environment aims to
leave it. We study the problem of existence of a
winning scheduler strategy and associated complexity
questions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bandari:2017:DBE,
author = "Maryam Bandari and Robert Simon and Hakan Aydin",
title = "{DMS}-Based Energy Optimizations for Clustered
{WSNs}",
journal = j-TECS,
volume = "16",
number = "3",
pages = "86:1--86:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2998179",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we consider clustered wireless sensor
networks where the nodes harvest energy from the
environment. We target performance-sensitive
applications that have to collectively send their
information to a cluster head by a predefined deadline.
The nodes are equipped with Dynamic Modulation Scaling
(DMS)-capable wireless radios. DMS provides a tuning
knob, allowing us to trade off communication latency
with energy consumption. We consider two optimization
objectives, maximizing total energy reserves and
maximizing the minimum energy level across all nodes.
For both objectives, we show that optimal solutions can
be obtained by solving Mixed Integer Linear Programming
problems. We also develop several fast heuristics that
are shown to provide approximate solutions
experimentally.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Devaraj:2017:FTP,
author = "Rajesh Devaraj and Arnab Sarkar and Santosh Biswas",
title = "Fault-Tolerant Preemptive Aperiodic {RT} Scheduling by
Supervisory Control of {TDES} on Multiprocessors",
journal = j-TECS,
volume = "16",
number = "3",
pages = "87:1--87:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012278",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Safety-critical real-time systems must meet stringent
timing and fault-tolerance requirements. This article
proposes a methodology for synthesizing an optimal
preemptive multiprocessor aperiodic task scheduler
using a formal supervisory control framework. The
scheduler can tolerate single/multiple permanent
processor faults. Further, the synthesis framework has
been empowered with a novel BDD-based symbolic
computation mechanism to control the exponential
state-space complexity of the optimal exhaustive
enumeration-oriented synthesis methodology.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lu:2017:CDS,
author = "Qining Lu and Guanpeng Li and Karthik Pattabiraman and
Meeta S. Gupta and Jude A. Rivers",
title = "Configurable Detection of {SDC}-causing Errors in
Programs",
journal = j-TECS,
volume = "16",
number = "3",
pages = "88:1--88:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3014586",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Silent Data Corruption (SDC) is a serious reliability
issue in many domains, including embedded systems.
However, current protection techniques are brittle and
do not allow programmers to trade off performance for
SDC coverage. Further, many require tens of thousands
of fault-injection experiments, which are highly time-
and resource-intensive. In this article, we propose two
empirical models, SDCTune and SDCAuto, to predict the
SDC proneness of a program's data. Both models are
based on static and dynamic features of the program
alone and do not require fault injections to be
performed. The main difference between them is that
SDCTune requires manual tuning while SDCAuto is
completely automated, using machine-learning
algorithms. We then develop an algorithm using both
models to selectively protect the most SDC-prone data
in the program subject to a given performance overhead
bound. Our results show that both models are accurate
at predicting the relative SDC rate of an application
compared to fault injection, for a fraction of the time
taken. Further, in terms of efficiency of detection
(i.e., ratio of SDC coverage provided to performance
overhead), our technique outperforms full duplication
by a factor of 0.78x to 1.65x with the SDCTune model
and 0.62x to 0.96x with SDCAuto model.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2017:FBA,
author = "Guoxian Huang and Lei Wang",
title = "An {FPGA}-Based Architecture for High-Speed Compressed
Signal Reconstruction",
journal = j-TECS,
volume = "16",
number = "3",
pages = "89:1--89:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3056481",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Compressive Sensing (CS) is an emerging research area
that allows efficient signal acquisition under the
sub-Nyquist rate while still promising reliable data
recovery. However, practical applications of CS in
hardware platforms are limited as signal reconstruction
is still challenging due to its high computational
complexity, especially for autonomous real-time signal
recovery. In this article, we propose an algorithmic
transformation technique referred to as Matrix
Inversion Bypass (MIB) to improve the signal recovery
efficiency of the Orthogonal Matching Pursuit
(OMP)-based CS reconstruction. The basic idea of MIB is
to decouple the computations of intermediate signal
estimates and matrix inversions, thereby enabling
parallel processing of these two time-consuming
operations in the OMP algorithm. The proposed MIB
naturally leads to a parallel architecture for
high-speed dedicated hardware implementations. An
FPGA-based implementation is developed with the
optimized structure aimed at the efficient utilization
of hardware resources while realizing high-speed signal
recovery. The proposed architecture can perform the
signal recovery at up to 1.4 $ \times $ faster than the
OMP-based implementation using almost the same hardware
resources.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Groza:2017:LCL,
author = "Bogdan Groza and Stefan Murvay and Anthony {Van
Herrewege} and Ingrid Verbauwhede",
title = "{LiBrA--CAN}: Lightweight Broadcast Authentication for
Controller Area Networks",
journal = j-TECS,
volume = "16",
number = "3",
pages = "90:1--90:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3056506",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 24 09:51:12 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Despite realistic concerns, security is still absent
from vehicular buses such as the widely used Controller
Area Network (CAN). We design an efficient protocol
based on efficient symmetric primitives, taking
advantage of two innovative procedures: splitting keys
between nodes and mixing authentication tags. This
results in a higher security level when compromised
nodes are in the minority, a realistic assumption for
automotive networks. Experiments are performed on
state-of-the-art Infineon TriCore controllers,
contrasted with low-end Freescale S12X cores, while
simulations are provided for the recently released
CAN-FD standard. To gain compatibility with existent
networks, we also discuss a solution based on CAN+.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2017:ESM,
author = "Sandeep K. Shukla",
title = "Editorial: Security of Mobile Devices",
journal = j-TECS,
volume = "16",
number = "4",
pages = "91:1--91:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129534",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Dec 9 08:24:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mathew:2017:GES,
author = "Jimson Mathew and Rajat Subhra Chakraborty and Dhiraj
K. Pradhan",
title = "Guest Editorial: Special Issue on {``Secure and
Fault-Tolerant Embedded Computing''}",
journal = j-TECS,
volume = "16",
number = "4",
pages = "92:1--92:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3075563",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ko:2017:PCS,
author = "Yohan Ko and Reiley Jeyapaul and Youngbin Kim and
Kyoungwoo Lee and Aviral Shrivastava",
title = "Protecting Caches from Soft Errors: a Microarchitect's
Perspective",
journal = j-TECS,
volume = "16",
number = "4",
pages = "93:1--93:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063180",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Soft error is one of the most important design
concerns in modern embedded systems with aggressive
technology scaling. Among various microarchitectural
components in a processor, cache is the most
susceptible component to soft errors. Error detection
and correction codes are common protection techniques
for cache memory due to their design simplicity. In
order to design effective protection techniques for
caches, it is important to quantitatively estimate the
susceptibility of caches without and even with
protections. At the architectural level, vulnerability
is the metric to quantify the susceptibility of data in
caches. However, existing tools and techniques
calculate the vulnerability of data in caches through
coarse-grained block-level estimation. Further, they
ignore common cache protection techniques such as error
detection and correction codes. In this article, we
demonstrate that our word-level vulnerability
estimation is accurate through intensive fault
injection campaigns as compared to block-level one.
Further, our extensive experiments over benchmark
suites reveal several counter-intuitive and interesting
results. Parity checking when performed over just reads
provides reliable and power-efficient protection than
that when performed over both reads and writes. On the
other hand, checking error correcting codes only at
reads alone can be vulnerable even for single-bit soft
errors, while that at both reads and writes provides
the perfect reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Esposito:2017:NMO,
author = "Stefano Esposito and Massimo Violante and Marco Sozzi
and Marco Terrone and Massimo Traversone",
title = "A Novel Method for Online Detection of Faults
Affecting Execution-Time in Multicore-Based Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "94:1--94:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063313",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a bounded interference method,
based on statistical evaluations, for online detection
and tolerance of any fault capable of causing a
deadline miss. The proposed method requires data that
can be gathered during the profiling and worst-case
execution time (WCET) analysis phase. This article
describes the method, its application, and then it
presents an avionic mixed-criticality use case for
experimental evaluation, considering both dual-core and
quad-core platforms. Results show that faults that can
cause a timing violation are correctly identified while
other faults that do not introduce a significant
temporal interference can be tolerated to avoid high
recovery overheads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yuce:2017:AFI,
author = "Bilgiday Yuce and Nahid Farhady Ghalaty and Chinmay
Deshpande and Harika Santapuri and Conor Patrick and
Leyla Nazhandali and Patrick Schaumont",
title = "Analyzing the Fault Injection Sensitivity of Secure
Embedded Software",
journal = j-TECS,
volume = "16",
number = "4",
pages = "95:1--95:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063311",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Fault attacks on cryptographic software use faulty
ciphertext to reverse engineer the secret encryption
key. Although modern fault analysis algorithms are
quite efficient, their practical implementation is
complicated because of the uncertainty that comes with
the fault injection process. First, the intended fault
effect may not match the actual fault obtained after
fault injection. Second, the logic target of the fault
attack, the cryptographic software, is above the
abstraction level of physical faults. The resulting
uncertainty with respect to the fault effects in the
software may degrade the efficiency of the fault
attack, resulting in many more trial fault injections
than the amount predicted by the theoretical fault
attack. In this contribution, we highlight the
important role played by the processor
microarchitecture in the development of a fault attack.
We introduce the microprocessor fault sensitivity model
to systematically capture the fault response of a
microprocessor pipeline. We also propose
Microarchitecture-Aware Fault Injection Attack (MAFIA).
MAFIA uses the fault sensitivity model to guide the
fault injection and to predict the fault response. We
describe two applications for MAFIA. First, we
demonstrate a biased fault attack on an unprotected
Advanced Encryption Standard (AES) software program
executing on a seven-stage pipelined Reduced
Instruction Set Computer (RISC) processor. The use of
the microprocessor fault sensitivity model to guide the
attack leads to an order of magnitude fewer fault
injections compared to a traditional, blind fault
injection method. Second, MAFIA can be used to break
known software countermeasures against fault injection.
We demonstrate this by systematically breaking a
collection of state-of-the-art software fault
countermeasures. These two examples lead to the key
conclusion of this work, namely that software fault
attacks become much more harmful and effective when an
appropriate microprocessor fault sensitivity model is
used. This, in turn, highlights the need for better
fault countermeasures for software.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mera:2017:ATP,
author = "Maria Isabel Mera and Jonah Caplan and Seyyed Hasan
Mozafari and Brett H. Meyer and Peter Milder",
title = "Area, Throughput, and Power Trade-Offs for {FPGA}- and
{ASIC}-Based Execution Stream Compression",
journal = j-TECS,
volume = "16",
number = "4",
pages = "96:1--96:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063312",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "An emerging trend in safety-critical computer system
design is the use of compression --- for example, using
cyclic redundancy check (CRC) or Fletcher checksum (FC)
--- to reduce the state that must be compared to verify
correct redundant execution. We examine the costs and
performance of CRC and FC as compression algorithms
when implemented in hardware for embedded
safety-critical systems. To do so, we have developed
parameterizable hardware-generation tools targeting CRC
and two novel FC implementations. We evaluate the
resulting designs implemented for FPGA and ASIC and
analyze their efficiency. While CRC is often best, FC
dominates when high throughput is needed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tigori:2017:FMB,
author = "Kabland Toussaint Gautier Tigori and Jean-Luc
B{\'e}chennec and S{\'e}bastien Faucou and Olivier
Henri Roux",
title = "Formal Model-Based Synthesis of Application-Specific
Static {RTOS}",
journal = j-TECS,
volume = "16",
number = "4",
pages = "97:1--97:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3015777",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In an embedded system, the specialization of the code
of the real-time operating system (RTOS) according to
the requirements of the application allows one to
remove unused services and other sources of dead code
from the binary program. The typical specialization
process is based on a mix of precompiler macros and
build scripts, both of which are known for being
sources of errors. In this article, we present a new
model-based approach to the design of
application-specific RTOS. Starting with finite state
models describing the RTOS and the application
requirements, the set of blocks in the RTOS code
actually used by the application is automatically
computed. This set is used to build an
application-specific RTOS model. This model is fed into
a code generator to produce the source code of an
application-specific RTOS. It is also used to carry on
model-based validations and verifications, including
the formal verification that the specialization process
did not introduce unwanted behaviors or suppress
expected ones. To demonstrate the feasibility of this
approach, it is applied to specialize Trampoline, an
open-source implementation of the AUTOSAR OS standard,
to an industrial case study from the automotive
domain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Santanna:2017:DIS,
author = "Francisco Sant'anna and Roberto Ierusalimschy and
Noemi Rodriguez and Silvana Rossetto and Adriano
Branco",
title = "The Design and Implementation of the Synchronous
Language {C{\'e}U}",
journal = j-TECS,
volume = "16",
number = "4",
pages = "98:1--98:26",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3035544",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "C {\'e}U is a synchronous language targeting soft
real-time systems. It is inspired by Esterel and has a
simple semantics with fine-grain control over program
execution. C{\'e}U uses an event-triggered notion of
time that enables compile-time checks to detect
conflicting concurrent statements, resulting in
deterministic and concurrency-safe programs. We present
the particularities of our design in comparison to
Esterel, such as stack-based internal events,
concurrency checks, safe integration with C, and
first-class timers. We also present two implementation
back ends: one aiming for resource efficiency and
interoperability with C, and another as a virtual
machine that allows remote reprogramming.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Stilkerich:2017:PGU,
author = "Isabella Stilkerich and Clemens Lang and Christoph
Erhardt and Christian Bay and Michael Stilkerich",
title = "The Perfect Getaway: Using Escape Analysis in Embedded
Real-Time Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "99:1--99:30",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3035542",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The use of a managed, type-safe language such as Java
in real-time and embedded systems offers productivity
and, in particular, safety and dependability benefits
at a reasonable cost. It has been shown for commodity
systems that Escape Analysis (EA) enables a set of
useful optimizations, and benefits from the properties
of a type-safe language. In this article, we explore
the application of escape analysis in KESO [Stilkerich
et al. 2012], a Java ahead-of-time compiler targeting
embedded real-time systems. We present specific
applications of EA for embedded programs that go beyond
the widely known stack-allocation and synchronization
optimizations such as extended remote-procedure-call
(RPC) support for software-isolated applications,
automated inference of immutable data, or improved
upper space and time bounds for worst-case
estimations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hassan:2017:PRA,
author = "Mohamed Hassan and Hiren Patel and Rodolfo
Pellizzoni",
title = "{PMC}: a Requirement-Aware {DRAM} Controller for
Multicore Mixed Criticality Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "100:1--100:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3019611",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose a novel approach to schedule memory
requests in Mixed Criticality Systems (MCS). This
approach supports an arbitrary number of criticality
levels by enabling the MCS designer to specify memory
requirements per task. It retains locality within
large-size requests to satisfy memory requirements of
all tasks. To achieve this target, we introduce a
compact time-division-multiplexing scheduler, and a
framework that constructs optimal schedules to manage
requests to off-chip memory. We also present a static
analysis that guarantees meeting requirements of all
tasks. We compare the proposed controller against
state-of-the-art memory controllers using both a case
study and synthetic experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2017:HAT,
author = "Tianyi Wang and Soamar Homsi and Linwei Niu and
Shaolei Ren and Ou Bai and Gang Quan and Meikang Qiu",
title = "Harmonicity-Aware Task Partitioning for Fixed Priority
Scheduling of Probabilistic Real-Time Tasks on
Multi-Core Platforms",
journal = j-TECS,
volume = "16",
number = "4",
pages = "101:1--101:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3064813",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The uncertainty due to performance variations of IC
chips and resource sharing on multi-core platforms have
significantly degraded the predictability of real-time
systems. Traditional deterministic approaches based on
the worst-case assumptions become extremely pessimistic
and thus unpractical. In this article, we address the
problem of scheduling a set of fixed-priority periodic
real-time tasks on multi-core platforms in a
probabilistic manner. Specifically, we consider task
execution time as a probabilistic distribution and
study how to schedule these tasks on multi-core
platforms with guaranteed Quality of Service (QoS)
requirements in terms of deadline-missing
probabilities. Moreover, it is a well-known fact that
the relationship among task periods, if exploited
appropriately, can significantly improve the processor
utilization. To this end, we present a novel approach
to partition real-time tasks that can take both task
execution time distributions and their period
relationships into consideration. From our extensive
experiment results, our proposed methods can greatly
improve the schedulability of real-time tasks when
compared with existing approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2017:DRM,
author = "Yi Wang and Yajun Ha",
title = "A {DFA}-Resistant and Masked {PRESENT} with Area
Optimization for {RFID} Applications",
journal = j-TECS,
volume = "16",
number = "4",
pages = "102:1--102:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3035543",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Radio-Frequency Identification (RFID) tag-based
applications are usually resource constrained and
security sensitive. However, only about 2,000 gate
equivalents in a tag can be budgeted for implementing
security components [27]. This requires not only
lightweight cryptographic algorithms such as PRESENT
(around 1,000 gate equivalents) but also lightweight
protections against modern Side Channel Attacks (SCAs).
With this budget, the first-order masking and fault
detection are two suitable countermeasures to be
developed for PRESENT. However, if both countermeasures
are applied without any optimization, it will
significantly exceed the given area budget. In this
work, we optimize area to include both countermeasures
to maximize the security for PRESENT within this RFID
area budget. The most area-consuming parts of the
proposed design are the masked S-boxes and the inverse
masked S-boxes. To optimize the area, we have deduced a
computational relationship between these two parts,
which enables us to reuse the hardware resource of the
masked S-boxes to implement the inverse masked S-boxes.
The proposed design takes up only 2,376 gates with UMC
65nm CMOS technology. Compared with the unoptimized
design, our implementation reduces the overall area by
28.45\%. We have tested the effectiveness of the
first-order Differential Power Analysis (DPA) and
Differential Fault Analysis (DFA) -resistant
countermeasures. Experimental results show that we have
enhanced the SCA resistance of our PRESENT
implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nagar:2017:RCB,
author = "Kartik Nagar and Y. N. Srikant",
title = "Refining Cache Behavior Prediction Using Cache Miss
Paths",
journal = j-TECS,
volume = "16",
number = "4",
pages = "103:1--103:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3035541",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Worst-Case Execution Time (WCET) is an important
metric for programs running on real-time systems, and
finding precise estimates of a program's WCET is
crucial to avoid wastage of hardware resources and to
improve the schedulability of task sets. Caches have a
major impact on a program's execution time, and
accurate estimation of a program's cache behavior can
lead to significant reduction in its estimated WCET.
The traditional approach to cache analysis generally
targets the worst-case cache behavior of individual
cache accesses and provides a safe hit-miss
classification for every individual access. In this
work, we show that these classifications are not
sufficient to precisely capture cache behavior, since
they apply to individual accesses, and often, more
precise predictions can be made about groups of
accesses. Further, memory accesses inside loops may
show the worst-case behavior only for a subset of the
iteration space. In order to predict such behavior in a
scalable fashion, we use the fact that the cache
behavior of an access mostly depends only on the memory
accesses made in the immediate vicinity, and hence we
analyze a small, fixed-size neighborhood of every
access with complete precision and summarize the
resulting information in the form of cache miss paths.
A variety of analyses are then performed on the cache
miss paths to make precise predictions about cache
behavior. We also demonstrate precision issues in
Abstract Interpretation-based Must and Persistence
cache analysis that can be easily solved using cache
miss paths. Experimental results over a wide range of
benchmarks demonstrate precision improvement in WCET of
multipath programs over previous approaches, and we
also show how to integrate our approach with other
microarchitectural analysis such as pipeline
analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Benerecetti:2017:ASS,
author = "Massimo Benerecetti and Marco Faella",
title = "Automatic Synthesis of Switching Controllers for
Linear Hybrid Systems: Reachability Control",
journal = j-TECS,
volume = "16",
number = "4",
pages = "104:1--104:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047500",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We consider the problem of computing the controllable
region of a Linear Hybrid Automaton with controllable
and uncontrollable transitions, w.r.t. a reachability
objective. We provide an algorithm for the
finite-horizon version of the problem, based on
computing the set of states that must reach a given
non-convex polyhedron while avoiding another one,
subject to a polyhedral constraint on the slope of the
trajectory. Experimental results are presented, based
on an implementation of the proposed algorithm on top
of the tool SpaceEx.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sandoval:2017:TTS,
author = "Nathan Sandoval and Casey Mackin and Sean Whitsitt and
Vijay Shankar Gopinath and Sachidanand Mahadevan and
Andrew Milakovich and Kyle Merry and Jonathan Sprinkle
and Roman Lysecky",
title = "Task Transition Scheduling for Data-Adaptable
Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "105:1--105:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047498",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Data-adaptable embedded systems operate on a variety
of data streams, which requires a large degree of
configurability and adaptability to support runtime
changes in data stream inputs. Data-adaptable
reconfigurable embedded systems, when decomposed into a
series of tasks, enable a flexible runtime
implementation in which a system can transition the
execution of certain tasks between hardware and
software while simultaneously continuing to process
data during the transition. Efficient runtime
scheduling of task transitions is needed to optimize
system throughput and latency of the reconfiguration
and transition periods. In this article, we provide an
overview of a runtime framework enabling the efficient
transition of tasks between software and hardware in
response to changes in system inputs. We further
present and analyze several runtime transition
scheduling algorithms and highlight the latency and
throughput tradeoffs for two data-adaptable systems. To
evaluate the task transition selection algorithms, a
case study was performed on an adaptable JPEG2000
implementation as well as three other synchronous
dataflow systems characterized by transition latency
and communication load.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zheng:2017:RTS,
author = "Xi Zheng and Christine Julien and Hongxu Chen and
Rodion Podorozhny and Franck Cassez",
title = "Real-Time Simulation Support for Runtime Verification
of Cyber-Physical Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "106:1--106:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063382",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In Cyber-Physical Systems (CPS), cyber and physical
components must work seamlessly in tandem. Runtime
verification of CPS is essential yet very difficult,
due to deployment environments that are expensive,
dangerous, or simply impossible to use for verification
tasks. A key enabling factor of runtime verification of
CPS is the ability to integrate real-time simulations
of portions of the CPS into live running systems. We
propose a verification approach that allows CPS
application developers to opportunistically leverage
real-time simulation to support runtime verification.
Our approach, termed B raceBind, allows selecting, at
runtime, between actual physical processes or
simulations of them to support a running CPS
application. To build BraceBind, we create a real-time
simulation architecture to generate and manage multiple
real-time simulation environments based on existing
simulation models in a manner that ensures sufficient
accuracy for verifying a CPS application. Specifically,
BraceBind aims to both improve simulation speed and
minimize latency, thereby making it feasible to
integrate simulations of physical processes into the
running CPS application. BraceBind then integrates this
real-time simulation architecture with an existing
runtime verification approach that has low
computational overhead and high accuracy. This
integration uses an aspect-oriented adapter
architecture that connects the variables in the cyber
portion of the CPS application with either sensors and
actuators in the physical world or the automatically
generated real-time simulation. Our experimental
results show that, with a negligible performance
penalty, our approach is both efficient and effective
in detecting program errors that are otherwise only
detectable in a physical deployment.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ma:2017:DPE,
author = "Kaisheng Ma and Xueqing Li and Huichu Liu and Xiao
Sheng and Yiqun Wang and Karthik Swaminathan and
Yongpan Liu and Yuan Xie and John Sampson and
Vijaykrishnan Narayanan",
title = "Dynamic Power and Energy Management for Energy
Harvesting Nonvolatile Processor Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "107:1--107:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3077575",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Self-powered systems running on scavenged energy will
be a key enabler for pervasive computing across the
Internet of Things. The variability of input power in
energy-harvesting systems limits the effectiveness of
static optimizations aimed at maximizing the
input-energy-to-computation ratio. We show that the
resultant gap between available and exploitable energy
is significant, and that energy storage optimizations
alone do not significantly close the gap. We
characterize these effects on a real, fabricated
energy-harvesting system based on a nonvolatile
processor. We introduce a unified energy-oriented
approach to first optimize the number of backups, by
more aggressively using the stored energy available
when power failure occurs, and then optimize forward
progress via improving the rate of input energy to
computation via dynamic voltage and frequency scaling
and self-learning techniques. We evaluate combining
these schemes and show capture of up to 75.5\% of all
input energy toward processor computation, an average
of $ 1.54 \times $ increase over the best static
``Forward Progress'' baseline system. Notably, our
energy-optimizing policy combinations simultaneously
improve both the rate of forward progress and the rate
of backup events (by up to 60.7\% and 79.2\% for RF
power, respectively, and up to 231.2\% and reduced to
zero, respectively, for solar power). This contrasts
with static frequency optimization approaches in which
these two metrics are antagonistic.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "107",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chatterjee:2017:FTD,
author = "Navonil Chatterjee and Suraj Paul and Santanu
Chattopadhyay",
title = "Fault-Tolerant Dynamic Task Mapping and Scheduling for
Network-on-Chip-Based Multicore Platform",
journal = j-TECS,
volume = "16",
number = "4",
pages = "108:1--108:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3055512",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In Network-on-Chip (NoC)-based multicore systems, task
allocation and scheduling are known to be important
problems, as they affect the performance of
applications in terms of energy consumption and timing.
Advancement of deep submicron technology has made it
possible to scale the transistor feature size to the
nanometer range, which has enabled multiple processing
elements to be integrated onto a single chip. On the
flipside, it has made the integrated entities on the
chip more susceptible to different faults. Although a
significant amount of work has been done in the domain
of fault-tolerant mapping and scheduling, existing
algorithms either precompute reconfigured mapping
solutions at design time while anticipating fault(s)
scenarios or adopt a hybrid approach wherein a part of
the fault mitigation strategy relies on the design-time
solution. The complexity of the problem rises further
for real-time dynamic systems where new applications
can arrive in the multicore platform at any time
instant. For real-time systems, the validity of
computation depends both on the correctness of results
and on temporal constraint satisfaction. This article
presents an improved fault-tolerant dynamic solution to
the integrated problem of application mapping and
scheduling for NoC-based multicore platforms. The
developed algorithm provides a unified mapping and
scheduling method for real-time systems focusing on
meeting application deadlines and minimizing
communication energy. A predictive model has been used
to determine the failure-prone cores in the system for
which a fault-tolerant resource allocation with task
redundancy has been performed. By selectively using a
task replication policy, the reliability of the
application, executing on a given NoC platform, is
improved. A detailed evaluation of the performance of
the proposed algorithm has been conducted for both real
and synthetic applications. When compared with other
fault-tolerant algorithms reported in the literature,
performance of the proposed algorithm shows an average
reduction of 56.95\% in task re-execution time overhead
and an average improvement of 31\% in communication
energy. Further, for time-constrained tasks, deadline
satisfaction has also been achieved for most of the
test cases by the developed algorithm, whereas the
techniques reported in the literature failed to meet
deadline in about 45\% test cases.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "108",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahir:2017:LAR,
author = "Prashant Ahir and Mehran Mozaffari-Kermani and Reza
Azarderakhsh",
title = "Lightweight Architectures for Reliable and Fault
Detection {Simon} and {Speck} Cryptographic Algorithms
on {FPGA}",
journal = j-TECS,
volume = "16",
number = "4",
pages = "109:1--109:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3055514",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The widespread use of sensitive and constrained
applications necessitates lightweight (low-power and
low-area) algorithms developed for constrained
nano-devices. However, nearly all of such algorithms
are optimized for platform-based performance and may
not be useful for diverse and flexible applications.
The National Security Agency (NSA) has proposed two
relatively recent families of lightweight ciphers, that
is, Simon and Speck, designed as efficient ciphers on
both hardware and software platforms. This article
proposes concurrent error detection schemes to provide
reliable architectures for these two families of
lightweight block ciphers. The research work on
analyzing the reliability of these algorithms and
providing fault diagnosis approaches has not been
undertaken to date to the best of our knowledge. The
main aim of the proposed reliable architectures is to
provide high error coverage while maintaining
acceptable area and power consumption overheads. To
achieve this, we propose a variant of recomputing with
encoded operands. These low-complexity schemes are
suited for low-resource applications such as sensitive,
constrained implantable and wearable medical devices.
We perform fault simulations for the proposed
architectures by developing a fault model framework.
The architectures are simulated and analyzed on recent
field-programmable grate array (FPGA) platforms, and it
is shown that the proposed schemes provide high error
coverage. The proposed low-complexity concurrent error
detection schemes are a step forward toward more
reliable architectures for Simon and Speck algorithms
in lightweight, secure applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "109",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2017:EMW,
author = "Chen Pan and Mimi Xie and Chengmo Yang and Yiran Chen
and Jingtong Hu",
title = "Exploiting Multiple Write Modes of Nonvolatile Main
Memory in Embedded Systems",
journal = j-TECS,
volume = "16",
number = "4",
pages = "110:1--110:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063130",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Existing Nonvolatile Memories (NVMs) have many
attractive features to be the main memory of embedded
systems. These features include low power, high
density, and better scalability. Recently, Multilevel
Cell (MLC) NVM has gained more and more popularity as
it can provide a higher density than the traditional
Single-Level Cell (SLC) NVM. However, there are also
drawbacks in MLC NVM, namely, limited write endurance
and expensive write operation. These two drawbacks have
to be overcome before MLC NVM can be practically
adopted as the main memory. In MLC Nonvolatile Main
Memory (NVMM), two different types of write operations
with very diverse data retention times are allowed. The
first type maintains data for years but takes a longer
time to write and is detrimental to the endurance. The
second type maintains data for a short period but takes
a shorter time to write. By observing that much of the
data written to main memory is temporary and does not
need to last long during the execution of a program, in
this article, we propose novel task scheduling and
write operation selection algorithms to improve MLC
NVMM endurance and program efficiency. An Integer
Linear Programming (ILP) formulation is first proposed
to obtain optimal results. Since ILP takes exponential
time to solve, we also propose the Multiwrite
Mode-Aware Scheduling (MMAS) algorithm to achieve a
near-optimal solution in polynomial time. Additionally,
the Dynamical Memory Block Screening (DMS) algorithm is
proposed to achieve wear leveling. The experimental
results demonstrate that the proposed techniques can
greatly improve the lifetime of the MLC NVMM as well as
the efficiency of the program.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "110",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2017:TPR,
author = "Yu Li and Albert M. K. Cheng",
title = "Toward a Practical Regularity-based Model: The Impact
of Evenly Distributed Temporal Resource Partitions",
journal = j-TECS,
volume = "16",
number = "4",
pages = "111:1--111:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092945",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Most Hierarchical Real-time Scheduling (HiRTS)
techniques have focused on temporal resource partitions
in which time units are periodically distributed.
Although such periodic partitions could provide great
flexibility for the resource-level scheduling,
engineers face significant obstacles when trying to
determine the schedulability of real-time tasks running
on them. The main reason is that periodic partitions
fail to effectively bound the difference between the
ideal and the actual resource allocation. To solve this
problem, some researchers introduced the Regular
Partition, a type of temporal resource partition that
is almost evenly distributed. Recent research has shown
that it achieves maximal transparency for task
scheduling-some classical real-time scheduling problems
on a regular partition can be easily transformed into
equivalent problems on a dedicated single resource.
However, the resource partitioning problem for regular
partitions is much more complicated than the one for
periodic partitions. Based on a practical two-layer
HiRTS platform, this article introduces MulZ (Multiple
Z-sequences), which is the first to solve this problem
with a partitioned scheduling strategy. By using a more
complicated approximation methodology, our experimental
results show that MulZ outperforms the current best
global scheduling algorithm on this problem. After
that, it compares the overall performance of the
periodic partition and the regular partition. We
conclude that the regular partition is a better choice
for the integration of real-time applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "111",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2017:WAF,
author = "Yooseong Kim and David Broman and Aviral Shrivastava",
title = "{WCET}-Aware Function-Level Dynamic Code Management on
Scratchpad Memory",
journal = j-TECS,
volume = "16",
number = "4",
pages = "112:1--112:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063383",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Scratchpad memory (SPM) is a promising on-chip memory
choice in real-time and cyber-physical systems where
timing is of the utmost importance. SPM has
time-predictable characteristics since its data
movement between the SPM and the main memory is
entirely managed by software. One way of such
management is dynamic management. In dynamic management
of instruction SPMs, code blocks are dynamically copied
from the main memory to the SPM at runtime by executing
direct memory access (DMA) instructions. Code
management techniques try to minimize the overhead of
DMA operations by finding an allocation scheme that
leads to efficient utilization. In this article, we
present three function-level code management
techniques. These techniques perform allocation at the
granularity of functions, with the objective of
minimizing the impact of DMA overhead to the worst-case
execution time (WCET) of a given program. The first
technique finds an optimal mapping of each function to
a region using integer linear programming (ILP),
whereas the second technique is a polynomial-time
heuristic that is suboptimal. The third technique maps
functions directly to SPM addresses, not using regions,
which can further reduce the WCET. Based on ILP, it can
also find an optimal mapping. We evaluate our
techniques using the M{\"a}lardalen WCET suite, MiBench
suite, and proprietary automotive applications from
industry. The results show that our techniques can
significantly reduce the WCET estimates compared to
caches with the state-of-the-art cache analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "112",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:PNM,
author = "Guanjun Liu and Mengchu Zhou and Changjun Jiang",
title = "{Petri} Net Models and Collaborativeness for Parallel
Processes with Resource Sharing and Message Passing",
journal = j-TECS,
volume = "16",
number = "4",
pages = "113:1--113:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2810001",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Petri nets are widely used to model and analyse
concurrent systems. There exist two distinct classes of
Petri nets that focus on different features of
concurrent systems. The first one features multiple
parallel processes sharing a group of common resources
but not interacting/collaborating with each other. The
second one allows multiple parallel processes to
interact/collaborate with each other via message
exchange but does not share any common resources.
However, in many distributed environments, multiple
processes both interact/collaborate with each other and
share some common resources. To model and analyse such
systems, this article defines a new class of Petri nets
called Parallel Process Nets (P$^2$ Ns) that may be
viewed as a generalization of the two mentioned above.
We propose collaborativeness and close
collaborativeness for P$^2$ Ns. The former guarantees
that a modelled system is both deadlock-free and
livelock-free, and the latter guarantees that it is
deadlock-free, livelock-free, and starvation-free.
These concepts and ideas are illustrated through some
classical examples such as Producer-Consumer Problem
and Dinning Philosophers Problem. Algorithms are
developed to decide them. At last, P$^2$ Ns are applied
to the modelling and analysis of two real systems:
hospital information system and elevator scheduling
system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "113",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ciszewski:2017:EAC,
author = "Michal Ciszewski and Konrad Iwanicki",
title = "Efficient Automated Code Partitioning for
Microcontrollers with Switchable Memory Banks",
journal = j-TECS,
volume = "16",
number = "4",
pages = "114:1--114:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3055511",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Switching active memory banks at runtime allows a
processor with a narrow address bus to access memory
that exceeds ranges normally addressable via the bus.
Switching code memory banks is regaining interest in
microcontrollers for the Internet of Things (IoT),
which have to run continuously growing software, while
at the same time consuming ultra-small amounts of
energy. To make use of bank switching, such software
must be partitioned among the available banks and
augmented with bank-switching instructions. In contrast
to the augmenting, which is done automatically by a
compiler, today the partitioning is normally done
manually by programmers. However, since IoT software is
cross-compiled on much more powerful machines than its
target microcontrollers, it becomes possible to
partition it automatically during compilation. In this
article, we thus study the problem of partitioning
program code among banks such that the resulting
runtime performance of the program is maximized. We
prove that the problem is NP -hard and propose a
heuristic algorithm with a low complexity, so it
enables fast compilation and hence interactive software
development. The algorithm decomposes the problem into
three subproblems and introduces a heuristic for each
of them: (1) which pieces of code to partition, (2)
which of them to assign to permanently mapped banks,
and (3) how to divide the remaining ones among
switchable banks. We integrate the algorithm, together
with earlier ones, in an open-source compiler and test
the resulting solution on synthetic as well as actual
commercial IoT software bases, thereby demonstrating
its advantages and drawbacks. In particular, the
results show that the performance of partitions
produced by our algorithm comes close to that of
partitions created manually by programmers with expert
knowledge on the partitioned code.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "114",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2017:EKM,
author = "Yun Liang and Xiuhong Li",
title = "Efficient Kernel Management on {GPUs}",
journal = j-TECS,
volume = "16",
number = "4",
pages = "115:1--115:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070710",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics Processing Units (GPUs) have been widely
adopted as accelerators for compute-intensive
applications due to its tremendous computational power
and high memory bandwidth. As the complexity of
applications continues to grow, each new generation of
GPUs has been equipped with advanced architectural
features and more resources to sustain its performance
acceleration capability. Recent GPUs have been featured
with concurrent kernel execution, which is designed to
improve the resource utilization by executing multiple
kernels simultaneously. However, it is still a
challenge to find a way to manage the resources on GPUs
for concurrent kernel execution. Prior works only
achieve limited performance improvement as they do not
optimize the thread-level parallelism (TLP) and model
the resource contention for the concurrently executing
kernels. In this article, we design an efficient kernel
management framework that optimizes the performance for
concurrent kernel execution on GPUs. Our kernel
management framework contains two key components: TLP
modulation and cache bypassing. The TLP modulation is
employed to adjust the TLP for the concurrently
executing kernels. It consists of three parts: kernel
categorization, static TLP modulation, and dynamic TLP
modulation. The cache bypassing is proposed to mitigate
the cache contention by only allowing a subset of a
kernel's blocks to access the L1 data cache.
Experiments indicate that our framework can improve the
performance by $ 1.51 \times $ on average
(energy-efficiency by $ 1.39 \times $ on average),
compared with the default concurrent kernel execution
framework.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "115",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2017:ESD,
author = "Yuliang Sun and Lanjun Wang and Chen Wang and Yu
Wang",
title = "Exploiting Stable Data Dependency in Stream Processing
Acceleration on {FPGAs}",
journal = j-TECS,
volume = "16",
number = "4",
pages = "116:1--116:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092950",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "With the unique feature of fine-grained parallelism,
field-programmable gate arrays (FPGAs) show great
potential for streaming algorithm acceleration.
However, the lack of a design framework, restrictions
on FPGAs, and ineffective tools impede the utilization
of FPGAs in practice. In this study, we provide a
design paradigm to support streaming algorithm
acceleration on FPGAs. We first propose an abstract
model to describe streaming algorithms with homogeneous
sub-functions (HSF) and stable data dependency (SDD),
which we call the HSF-SDD model. Using this model, we
then develop an FPGA framework, PE-Ring, that has the
advantages of (1) fully exploiting algorithm
parallelism to achieve high performance, (2) leveraging
block RAM to serve large scale parameters, and (3)
enabling flexible parameter adjustments. Based on the
proposed model and framework, we finally implement a
specific converter to generate the register-transfer
level representation of the PE-Ring. Experimental
results show that our method outperforms ordinary FPGA
design tools by one to two orders of magnitude.
Experiments also demonstrate the scalability of the
PE-Ring.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "116",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:HPI,
author = "Zhe Liu and Thomas P{\"o}ppelmann and Tobias Oder and
Hwajeong Seo and Sujoy Sinha Roy and Tim G{\"u}neysu
and Johann Gro{\ss}sch{\"a}dl and Howon Kim and Ingrid
Verbauwhede",
title = "High-Performance Ideal Lattice-Based Cryptography on
$8$-Bit {AVR} Microcontrollers",
journal = j-TECS,
volume = "16",
number = "4",
pages = "117:1--117:??",
month = aug,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092951",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Aug 14 18:53:33 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Over recent years lattice-based cryptography has
received much attention due to versatile average-case
problems like Ring-LWE or Ring-SIS that appear to be
intractable by quantum computers. In this work, we
evaluate and compare implementations of Ring-LWE
encryption and the bimodal lattice signature scheme
(BLISS) on an 8-bit Atmel ATxmega128 microcontroller.
Our implementation of Ring-LWE encryption provides
comprehensive protection against timing side-channels
and takes 24.9ms for encryption and 6.7ms for
decryption. To compute a BLISS signature, our software
takes 317ms and 86ms for verification. These results
underline the feasibility of lattice-based cryptography
on constrained devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "117",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Malik:2017:MCH,
author = "Avinash Malik and Partha S. Roop and Sidharta Andalam
and Mark Trew and Michael Mendler",
title = "Modular Compilation of Hybrid Systems for Emulation
and Large Scale Simulation",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "118:1--118:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126536",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hybrid systems combine discrete controllers with
adjoining physical processes. While many approaches
exist for simulating hybrid systems, there are few
approaches for their emulation, especially when the
actual physical plant is not available. This paper
develops the first formal framework for emulation along
with a new compiler that enables large-scale (1000+
components) simulation. We propose a formal model
called Synchronous Emulation Automaton (SEA)
specifically for modular compilation and parallel
execution. SEA combines Linear Time Invariant (LTI)
systems with discrete mode switches and has the
following semantic differences with Hybrid Automata:
(1) the Ordinary Differential Equations are solved
analytically and the solutions are sampled at the
Worst-Case Reaction Time of the model and (2) we
develop a new composition semantics, which allows
individual SEAs to execute in parallel with each other.
The proposed semantics eliminates: (a) the need for
dynamic numerical solvers, and (b) the Zeno-phenomenon
by construction. Experimental results show that process
models designed using our tool (Piha) give a 3.6 times
execution speedup over Simulink\reg, and up to 26 times
speedup on manycore architectures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "118",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Blindell:2017:CPU,
author = "Gabriel Hjort Blindell and Mats Carlsson and Roberto
Casta{\~n}eda Lozano and Christian Schulte",
title = "Complete and Practical Universal Instruction
Selection",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "119:1--119:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126528",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In code generation, instruction selection chooses
processor instructions to implement a program under
compilation where code quality crucially depends on the
choice of instructions. Using methods from
combinatorial optimization, this paper proposes an
expressive model that integrates global instruction
selection with global code motion. The model introduces
(1) handling of memory computations and function calls,
(2) a method for inserting additional jump instructions
where necessary, (3) a dependency-based technique to
ensure correct combinations of instructions, (4) value
reuse to improve code quality, and (5) an objective
function that reduces compilation time and increases
scalability by exploiting bounding techniques. The
approach is demonstrated to be complete and practical,
competitive with LLVM, and potentially optimal (w.r.t.
the model) for medium-sized functions. The results show
that combinatorial optimization for instruction
selection is well-suited to exploit the potential of
modern processors in embedded systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "119",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Su:2017:EWA,
author = "Xuesong Su and Hui Wu and Jingling Xue",
title = "An Efficient {WCET}-Aware Instruction Scheduling and
Register Allocation Approach for Clustered {VLIW}
Processors",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "120:1--120:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126524",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In real-time embedded system design, one major goal is
to construct a feasible schedule. Whether a feasible
schedule exists depends on the Worst-Case Execution
Time (WCET) of each task. Consequently, it is important
to minimize the WCET of each task. We investigate the
problem of instruction scheduling and register
allocation for a program executed on a clustered Very
Long Instruction Word (VLIW) processor such that the
WCET of the program is minimized, and propose a novel,
unified instruction scheduling and register allocation
heuristic approach. Our heuristic approach is
underpinned by a set of novel techniques, including
spanning graph-based WCET-aware live range splitting,
WCET-aware dynamic register pressure control,
WCET-aware basic block prioritization for performing
integrated instruction scheduling and register
allocation, and WCET-aware spill code handling. We have
implemented our approach in Trimaran 4.0, and compared
it with the state-of-the-art approach by using a set of
20 benchmarks. The experimental results show that our
approach achieves the maximum WCET improvement of
29.61\% and the average WCET improvement of 10.23\%,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "120",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Micolet:2017:SDP,
author = "Paul-Jules Micolet and Aaron Smith and Christophe
Dubach",
title = "A Study of Dynamic Phase Adaptation Using a Dynamic
Multicore Processor",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "121:1--121:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126523",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous processors such as ARM's big.LITTLE have
become popular for embedded systems. They offer a
choice between running workloads on a high performance
core or a low-energy core leading to increased energy
efficiency. However, the core configurations are fixed
at design time which offers a limited amount of
adaptation. Dynamic Multicore Processors (DMPs) bridge
the gap between homogeneous and fully reconfigurable
systems. Cores can fuse dynamically to adapt the
computational resources to the needs of different
workloads. There exists multiple examples of DMPs in
the literature, yet the focus has mainly been on static
partitioning. This paper conducts the first thorough
study of the potential for dynamic reconfiguration of
DMPs at runtime. We study how performance varies with
static partitioning and what software optimizations are
required to achieve high performance. We show that
energy consumption is reduced considerably when
adapting the number of cores to program phases, and
introduce a simple online model which predicts the
optimal number of cores to use to minimize energy
consumption while maintaining high performance. Using
the San Diego Vision Benchmark Suite as a use case, the
dynamic scheme leads to $ \approx $40\% energy savings
on average without decreasing performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "121",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Trub:2017:IPM,
author = "Roman Tr{\"u}b and Georgia Giannopoulou and Andreas
Tretter and Lothar Thiele",
title = "Implementation of Partitioned Mixed-Criticality
Scheduling on a Multi-Core Platform",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "122:1--122:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126533",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent industrial trends favor the adoption of
multi-core architectures for mixed-criticality
applications. Although several mixed-criticality
multi-core scheduling approaches have been proposed,
currently there are few implementations on hardware
that demonstrate efficient resource utilization and the
ability to bound interference on shared resources. To
address this necessity, we develop a mixed-criticality
runtime environment on the Kalray MPPA-256 Andey
many-core platform. The runtime environment implements
a scheduling policy based on adaptive temporal
partitioning. We develop models, methods and
implementation principles to implement the necessary
scheduling primitives, to achieve high platform
utilization and to perform a compositional worst-case
execution time analysis. The bounds account for
scheduling overheads and for the inter-task
interference on the platform's shared memory. Using
realistic benchmarks from avionics and signal
processing, we validate the correctness and tightness
of the bounds and demonstrate a high platform
utilization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "122",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gupta:2017:DDP,
author = "Ujjwal Gupta and Chetan Arvind Patil and Ganapati Bhat
and Prabhat Mishra and Umit Y. Ogras",
title = "{DyPO}: Dynamic {Pareto}-Optimal Configuration
Selection for Heterogeneous {MpSoCs}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "123:1--123:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126530",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern multiprocessor systems-on-chip (MpSoCs) offer
tremendous power and performance optimization
opportunities by tuning thousands of potential voltage,
frequency and core configurations. As the workload
phases change at runtime, different configurations may
become optimal with respect to power, performance or
other metrics. Identifying the optimal configuration at
runtime is infeasible due to the large number of
workloads and configurations. This paper proposes a
novel methodology that can find the Pareto-optimal
configurations at runtime as a function of the
workload. To achieve this, we perform an extensive
offline characterization to find classifiers that map
performance counters to optimal configurations. Then,
we use these classifiers and performance counters at
runtime to choose Pareto-optimal configurations. We
evaluate the proposed methodology by maximizing the
performance per watt for 18 single- and multi-threaded
applications. Our experiments demonstrate an average
increase of 93\%, 81\% and 6\% in performance per watt
compared to the interactive, on demand and powersave
governors, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "123",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Naresh:2017:CCC,
author = "Vignyan Reddy Kothinti Naresh and Dibakar Gope and
Mikko H. Lipasti",
title = "The {CURE}: Cluster Communication Using Registers",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "124:1--124:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126527",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "VLIW processors typically deliver high performance on
limited budget making them ideal for a variety of
communication and signal processing solutions. These
processors typically need large multi-ported register
files that can have side effects of increased cycle
time and high power consumption. The access delay and
energy of these register files can also become
prohibitive when increasing the register count or the
access ports, thus limiting the overall performance of
the processor. Most prior art circumvent this problem
by using multiple clusters with private register files,
to lower the access delay and reduce energy
consumption. However, clustering artifacts, like
increased inter--cluster communication operations and
spill-recovery code, result in a performance penalty.
This paper proposes CURE --- a novel technique to
considerably reduce the negative effects of clustering.
CURE augments the ISA to expose the communication
registers to the compilers to increase availability of
architectural register state to all functional units.
The inter--cluster communication operations are
integrated into regular ALU and memory operations to
improve instruction encoding efficiency. We also
propose a new code scheduling heuristic to handle the
ISA changes, and to realize the improvements in
processor's performance and energy consumption. Our
quantitative analysis estimates that CURE, when
compared to the baseline 8--issue uni--cluster
processor, boosts average performance by 61\% while
reducing the average register dynamic energy by 77\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "124",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Josipovic:2017:OLS,
author = "Lana Josipovic and Philip Brisk and Paolo Ienne",
title = "An Out-of-Order Load-Store Queue for Spatial
Computing",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "125:1--125:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126525",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The efficiency of spatial computing depends on the
ability to achieve maximal parallelism. This
necessitates memory interfaces that can correctly
handle memory accesses that arrive in arbitrary order
while still respecting data dependencies and ensuring
appropriate ordering for semantic correctness. However,
a typical memory interface for out-of-order processors
(i.e., a load-store queue) cannot immediately meet
these requirements: a different allocation policy is
needed to achieve out-of-order execution in spatial
systems that naturally omit the notion of sequential
program order, a fundamental piece of information for
correct execution. We show a novel and practical way to
organize the allocation for an out-of-order load-store
queue for spatial computing. The main idea is to
dynamically allocate groups of memory accesses
(depending on the dynamic behavior of the application),
where the access order within the group is statically
predetermined (for instance by a high-level synthesis
tool). We detail the construction of our load-store
queue and demonstrate on a few practical cases its
advantages over standard accelerator-memory
interfaces.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "125",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Crites:2017:DCE,
author = "Brian Crites and Karen Kong and Philip Brisk",
title = "Diagonal Component Expansion for Flow-Layer Placement
of Flow-Based Microfluidic Biochips",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "126:1--126:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126529",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Continuous flow-based microfluidic devices have seen a
huge increase in interest because of their ability to
automate and miniaturize biochemistry and biological
processes, as well as their promise of creating a
programmable platform for chemical and biological
experimentation. The major hurdle in the adoption of
these types of devices is in the design, which is
largely done by hand using tools such as AutoCAD or
SolidWorks, which require immense domain knowledge and
are hard to scale. This paper investigates the problem
of automated physical design for continuous flow-based
microfluidic very large scale integration (mVLSI)
biochips, starting from a netlist specification of the
flow layer. After an initial planar graph embedding,
vertices in the netlist are expanded into
two-dimensional components, followed by fluid channel
routing. A new heuristic, DIagonal Component Expansion
(DICE) is introduced for the component expansion step.
Compared to a baseline expansion method, DICE improves
area utilization by a factor of 8.90x and reduces
average fluid routing channel length by 47.4\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "126",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Elfar:2017:SER,
author = "Mahmoud Elfar and Zhanwei Zhong and Zipeng Li and
Krishnendu Chakrabarty and Miroslav Pajic",
title = "Synthesis of Error-Recovery Protocols for
Micro-Electrode-Dot-Array Digital Microfluidic
Biochips",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "127:1--127:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126538",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A digital microfluidic biochip (DMFB) is an attractive
technology platform for various biomedical
applications. However, a conventional DMFB is limited
by: (i) the number of electrical connections that can
be practically realized, (ii) constraints on droplet
size and volume, and (iii) the need for special
fabrication processes and the associated
reliability/yield concerns. To overcome the above
challenges, DMFBs based on a micro-electrode-dot-array
(MEDA) architecture have been proposed and fabricated
recently. Error recovery is of key interest for MEDA
biochips due to the need for system reliability. Errors
are likely to occur during droplet manipulation due to
defects, chip degradation, and the uncertainty inherent
in biochemical experiments. In this paper, we first
formalize error-recovery objectives, and then
synthesize optimal error-recovery protocols using a
model based on Stochastic Multiplayer Games (SMGs). We
also present a global error-recovery technique that can
update the schedule of fluidic operations in an
adaptive manner. Using three representative real-life
bioassays, we show that the proposed approach can
effectively reduce the bioassay completion time and
increase the probability of success for error
recovery.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "127",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gottscho:2017:LCM,
author = "Mark Gottscho and Irina Alam and Clayton Schoeny and
Lara Dolecek and Puneet Gupta",
title = "Low-Cost Memory Fault Tolerance for {IoT} Devices",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "128:1--128:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126534",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "IoT devices need reliable hardware at low cost. It is
challenging to efficiently cope with both hard and soft
faults in embedded scratchpad memories. To address this
problem, we propose a two-step approach: FaultLink and
Software-Defined Error-Localizing Codes (SDELC).
FaultLink avoids hard faults found during testing by
generating a custom-tailored application binary image
for each individual chip. During software
deployment-time, FaultLink optimally packs small
sections of program code and data into fault-free
segments of the memory address space and generates a
custom linker script for a lazy-linking procedure.
During run-time, SDELC deals with unpredictable soft
faults via novel and inexpensive Ultra-Lightweight
Error-Localizing Codes (UL-ELCs). These require fewer
parity bits than single-error-correcting Hamming codes.
Yet our UL-ELCs are more powerful than basic
single-error-detecting parity: they localize single-bit
errors to a specific chunk of a codeword. SDELC then
heuristically recovers from these localized errors
using a small embedded C library that exploits
observable side information (SI) about the
application's memory contents. SI can be in the form of
redundant data (value locality), legal/illegal
instructions, etc. Our combined FaultLink+SDELC
approach improves min-VDD by up to 440 mV and correctly
recovers from up to 90\% (70\%) of random single-bit
soft faults in data (instructions) with just three
parity bits per 32-bit word.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "128",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yantir:2017:AMM,
author = "Hasan Erdem Yantir and Ahmed M. Eltawil and Fadi J.
Kurdahi",
title = "Approximate Memristive In-memory Computing",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "129:1--129:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126526",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The bottleneck between the processing elements and
memory is the biggest issue contributing to the
scalability problem in computing. In-memory computation
is an alternative approach that combines memory and
processor in the same location, and eliminates the
potential memory bottlenecks. Associative processors
are a promising candidate for in-memory computation,
however the existing implementations have been deemed
too costly and power hungry. Approximate computing is
another promising approach for energy-efficient digital
system designs where it sacrifices the accuracy for the
sake of energy reduction and speedup in error-resilient
applications. In this study, approximate in-memory
computing is introduced in memristive associative
processors. Two approximate computing methodologies are
proposed; bit trimming and memristance scaling. Results
show that the proposed methods not only reduce energy
consumption of in-memory parallel computing but also
improve their performance. As compared to other
existing approximate computing methodologies on
different architectures (e.g., CPU, GPU, and ASIC),
approximate memristive in-memory computing exhibits
better results in terms of energy reduction (up to 80x)
and speedup (up to 20x) on a variety of benchmarks from
different domains when quality degradation is limited
to 10\% and it confirms that memristive associative
processors provide a highly-promising platform for
approximate computing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "129",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Raha:2017:QIA,
author = "Arnab Raha and Vijay Raghunathan",
title = "{qLUT}: Input-Aware Quantized Table Lookup for
Energy-Efficient Approximate Accelerators",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "130:1--130:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126531",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Approximate computing has emerged as a popular design
paradigm for optimizing the performance and energy
consumption of error-resilient applications in domains
such as machine learning, graphics, data analytics,
etc. Numerous techniques for approximate computing have
been proposed at different layers of the system stack,
from circuits to architecture to software. In this
work, we propose a new technique, called quantized
table lookup, for approximating the meta-functions used
in the core computational kernels of error-resilient
applications. In contrast to prior work that directly
approximates the functionality of the meta-functions,
the proposed technique instead approximates the input
data to the meta-functions by reducing/quantizing them
to a much smaller set of values that we call quantized
inputs. The small number of quantized inputs enables us
to completely replace the energy-intensive arithmetic
units in the meta-function with small and
energy-efficient lookup tables (called quantized lookup
tables or qLUT) that contain precomputed output values
corresponding to the quantized inputs. The proposed
approximation technique is not only highly generic, but
also inherently quality-configurable and input-aware.
Quality-configurability and input-awareness are
achieved by modulating the size of the qLUT as well as
selecting the values of the quantized inputs
judiciously based on the statistics of the original
input data. To evaluate the proposed technique, we have
implemented the dominant meta-functions of nine
error-resilient application benchmarks as quantized
table lookup based hardware accelerators using 45nm
technology. Experimental results demonstrate average
energy savings of 46\% at the application-level for
minimal ($<$ 1\%) loss in output quality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "130",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Egilmez:2017:UAF,
author = "Begum Egilmez and Matthew Schuchhardt and Gokhan Memik
and Raid Ayoub and Niranjan Soundararajan and Michael
Kishinevsky",
title = "User-aware Frame Rate Management in {Android
Smartphones}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "131:1--131:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126539",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Frame rate has a direct impact on the energy
consumption of smartphones: the higher the frame rate,
the higher the power consumption. Hence, reducing
display refreshes will reduce the power consumption.
However, it is risky to manipulate frame rate
drastically as it can deteriorate user satisfaction
with the device. In this work, we introduce a screen
management system that controls the frame rate on
smartphone displays based on a model that detects user
dissatisfaction due to display refreshes. This approach
is based on understanding when higher frame rates are
necessary, and providing lower frame rates -thus,
saving power- if the lower rate is predicted not to
cause user dissatisfaction. According to the results of
our first user survey with 20 participants, individuals
show highly varying requirements: while some users
require high frame rates for the highest satisfaction,
others are equally satisfied with lower frame rates.
Based on this observation, we develop a system that
predicts user dissatisfaction on the runtime and either
increases or decreases the maximum frame rate setting.
For user dissatisfaction predictions, we have compared
two different approaches: (1) static model, which uses
dissatisfaction characteristics of a fixed group of
people, and (2) user-specific model, which is learning
only from the specific user. Our second set of
experiments with 20 participants shows that users
report 32\% less dissatisfaction and 4\% more
dissatisfaction than the default Android system with
user-specific and static systems, respectively. These
experiments also show that, compared to the default
scheme, our mechanisms reduce the power consumption of
the phone by 7.2\% and 1.8\% on average with the
user-specific and static models, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "131",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yan:2017:FFI,
author = "Hao Yan and Lei Jiang and Lide Duan and Wei-Ming Lin
and Eugene John",
title = "{FlowPaP} and {FlowReR}: Improving Energy Efficiency
and Performance for {STT-MRAM}-Based Handheld Devices
under Read Disturbance",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "132:1--132:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126532",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Handheld devices, such as smartphones and tablets,
currently dominate the semiconductor market. The memory
access patterns of CPU and IP cores are dramatically
different in a handheld device, making the main memory
a critical bottleneck of the entire system. As a
result, non-volatile memories, such as spin transfer
torque magnetoresistive random-access memory
(STT-MRAM), are emerging as a replacement for the
existing DRAM-based main memory, achieving a wide
variety of advantages. However, replacing DRAM with
STT-MRAM also results in new design challenges
including read disturbance. A simple read-and-restore
scheme preserves data integrity under read disturbance,
but incurs significant performance and energy
overheads. Consequently, by utilizing unique
characteristics of mobile applications, we propose
FlowPaP, a flow pattern prediction scheme to
dynamically predict the write-to-last-read distances
for data frames running on a handheld device. FlowPaP
identifies and removes unnecessary memory restores
originally required for preventing read disturbance,
significantly improving energy efficiency and
performance for STT-MRAM-based handheld devices. In
addition, we propose a flow-based data retention time
reduction scheme named FlowReR to further lower energy
consumption of STT-MRAM at the expense of reducing its
data retention time. FlowReR imposes a second step that
marginally trades off the already improved energy
efficiency for performance improvements. Experimental
results show that, compared to the original
read-and-restore scheme, the application of FlowPaP and
FlowReR together can simultaneously improve energy
efficiency by 34\% and performance by 17\% for a set of
commonly used Android applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "132",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rai:2017:UCG,
author = "Siddharth Rai and Mainak Chaudhuri",
title = "Using Criticality of {GPU} Accesses in Memory
Management for {CPU--GPU} Heterogeneous Multi-Core
Processors",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "133:1--133:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126540",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous chip-multiprocessors with CPU and GPU
integrated on the same die allow sharing of critical
memory system resources among the CPU and GPU
applications. Such architectures give rise to
challenging resource scheduling problems. In this
paper, we explore memory access scheduling algorithms
driven by criticality of GPU accesses in such systems.
Different GPU access streams originate from different
parts of the GPU rendering pipeline, which behaves very
differently from the typical CPU pipeline requiring new
techniques for GPU access criticality estimation. We
propose a novel queuing network model to estimate the
performance-criticality of the GPU access streams. If a
GPU application performs below the quality of service
requirement (e.g., frame rate in 3D scene rendering),
the memory access scheduler uses the estimated
criticality information to accelerate the critical GPU
accesses. Detailed simulations done on a heterogeneous
chip-multiprocessor model with one GPU and four CPU
cores running heterogeneous mixes of DirectX, OpenGL,
and CPU applications show that our proposal improves
the GPU performance by 15\% on average without
degrading the CPU performance much. Extensions proposed
for the mixes containing GPGPU applications, which do
not have any quality of service requirement, improve
the performance by 7\% on average for these mixes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "133",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kang:2017:RLA,
author = "Wonkyung Kang and Dongkun Shin and Sungjoo Yoo",
title = "Reinforcement Learning-Assisted Garbage Collection to
Mitigate Long-Tail Latency in {SSD}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "134:1--134:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126537",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "NAND flash memory is widely used in various systems,
ranging from real-time embedded systems to enterprise
server systems. Because the flash memory has
erase-before-write characteristics, we need
flash-memory management methods, i.e., address
translation and garbage collection. In particular,
garbage collection (GC) incurs long-tail latency, e.g.,
100 times higher latency than the average latency at
the 99$^{th}$ percentile. Thus, real-time and
quality-critical systems fail to meet the given
requirements such as deadline and QoS constraints. In
this study, we propose a novel method of GC based on
reinforcement learning. The objective is to reduce the
long-tail latency by exploiting the idle time in the
storage system. To improve the efficiency of the
reinforcement learning-assisted GC scheme, we present
new optimization methods that exploit fine-grained GC
to further reduce the long-tail latency. The
experimental results with real workloads show that our
technique significantly reduces the long-tail latency
by 29--36\% at the 99.99$^{th}$ percentile compared to
state-of-the-art schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "134",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tretter:2017:MAC,
author = "Andreas Tretter and Georgia Giannopoulou and Matthias
Baer and Lothar Thiele",
title = "Minimising Access Conflicts on Shared Multi-Bank
Memory",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "135:1--135:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126535",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A common multi-core pattern consists of processors
communicating through shared, multi-banked on-chip
memory. Two approaches exist: Interleaved address
mapping, which spreads consecutive data over all banks,
and contiguous address mapping, which stores
consecutive data on a single bank. In this work, we
compare both approaches on the Kalray MPPA-256
platform. For contiguous mapping, we propose an
algorithm, based on graph colouring techniques, to
automatically perform the assignment of data blocks to
memory banks with the goal of minimising access
collisions and delays. Experiments with representative,
parallel real-world benchmarks show that 69\% of the
tested configurations, when optimised for contiguous
mapping by our algorithm, run up to 86\% faster on
average than with interleaved mapping.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "135",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Riazi:2017:CSC,
author = "M. Sadegh Riazi and Mohammad Samragh and Farinaz
Koushanfar",
title = "{CAMsure}: Secure Content-Addressable Memory for
Approximate Search",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "136:1--136:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126547",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We introduce CAMsure, the first realization of secure
Content Addressable Memory (CAM) in the context of
approximate search using near-neighbor algorithms.
CAMsure provides a lightweight solution for practical
secure (approximate) search with a minimal drop in the
accuracy of the search results. CAM has traditionally
been used as a hardware search engine that explores the
entire memory in a single clock cycle. However, there
has been little attention to the security of the data
stored in CAM. Our approach stores distance-preserving
hash embeddings within CAM to ensure data privacy. The
hashing method provides data confidentiality while
preserving similarity in the sense that a high
resemblance in the data domain is translated to a small
Hamming distance in the hash domain. Consequently, the
objective of near-neighbor search is converted to
approximate lookup table search which is compatible
with the realizations of emerging content addressable
memories. Our methodology delivers on average two
orders of magnitude faster response time compared to
RAM-based solutions that preserve the privacy of data
owners.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "136",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Piccolboni:2017:ECF,
author = "Luca Piccolboni and Alessandro Menon and Graziano
Pravadelli",
title = "Efficient Control-Flow Subgraph Matching for Detecting
Hardware {Trojans} in {RTL} Models",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "137:1--137:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126552",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Only few solutions for Hardware Trojan (HT) detection
work at Register-Transfer Level (RTL), thus delaying
the identification of possible security issues at lower
abstraction levels of the design process. In addition,
the most of existing approaches work only for specific
kinds of HTs. To overcome these limitations, we present
a verification approach that detects different types of
HTs in RTL models by exploiting an efficient
control-flow subgraph matching algorithm. The
prototypes of HTs that can be detected are modelled in
a library by using Control-Flow Graphs (CFGs) that can
be parametrised and extended to cover several variants
of Trojan patterns. Experimental results show that our
approach is effective and efficient in comparison with
other state-of-the-art solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "137",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Migliore:2017:HSA,
author = "Vincent Migliore and C{\'e}dric Seguin and Maria
M{\'e}ndez Real and Vianney Lapotre and Arnaud
Tisserand and Caroline Fontaine and Guy Gogniat and
Russell Tessier",
title = "A High-Speed Accelerator for Homomorphic Encryption
using the {Karatsuba} Algorithm",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "138:1--138:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126558",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Somewhat Homomorphic Encryption (SHE) schemes can be
used to carry out operations on ciphered data. In a
cloud computing scenario, personal information can be
processed secretly, inferring a high level of
confidentiality. The principle limitation of SHE is the
size of ciphertext compared to the size of the message.
This issue can be addressed by using a batching
technique that ``packs'' several messages into one
ciphertext. However, this method leads to important
drawbacks in standard implementations. This paper
presents a fast hardware/software co-design
implementation of an encryption procedure using the
Karatsuba algorithm. Our hardware accelerator is 1.5
times faster than the state of the art for 1 encryption
and 4 times faster for 4 encryptions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "138",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2017:FAK,
author = "Jiacheng Zhang and Youyou Lu and Jiwu Shu and Xiongjun
Qin",
title = "{FlashKV}: Accelerating {KV} Performance with
Open-Channel {SSDs}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "139:1--139:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126545",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As the cost-per-bit of solid state disks is decreasing
quickly, SSDs are supplanting HDDs in many cases,
including the primary storage of key-value stores.
However, simply deploying LSM-tree-based key-value
stores on commercial SSDs is inefficient and induces
heavy write amplification and severe garbage collection
overhead under write-intensive conditions. The main
cause of these critical issues comes from the triple
redundant management functionalities lying in the
LSM-tree, file system and flash translation layer,
which block the awareness between key-value stores and
flash devices. Furthermore, we observe that the
performance of LSM-tree-based key-value stores is
improved little by only eliminating these redundant
layers, as the I/O stacks, including the cache and
scheduler, are not optimized for LSM-tree's unique I/O
patterns. To address the issues above, we propose
FlashKV, an LSM-tree based key-value store running on
open-channel SSDs. FlashKV eliminates the redundant
management and semantic isolation by directly managing
the raw flash devices in the application layer. With
the domain knowledge of LSM-tree and the open-channel
information, FlashKV employs a parallel data layout to
exploit the internal parallelism of the flash device,
and optimizes the compaction, caching and I/O
scheduling mechanisms specifically. Evaluations show
that FlashKV effectively improves system performance by
$ 1.5 \times $ to $ 4.5 \times $ and decreases up to
50\% write traffic under heavy write conditions,
compared to LevelDB.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "139",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2017:PBB,
author = "Hong Seok Kim and Eyee Hyun Nam and Ji Hyuck Yun and
Sheayun Lee and Sang Lyul Min",
title = "{P-BMS}: a Bad Block Management Scheme in Parallelized
Flash Memory Storage Devices",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "140:1--140:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126550",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Flash memory is used as a main data storage medium in
increasingly large areas of applications, rapidly
replacing hard disk drives because of its low power
consumption, fast random access, and high shock
resistance. Such flash-based storage devices generally
incorporate multiple flash memory chips to meet the
ever growing capacity demands. Using multiple chips in
a single storage device, at the same time, opens an
opportunity to boost the performance based on
multi-unit parallelism. However, parallel execution of
multiple flash operations introduces complications when
bad blocks occur, which is unavoidable due to flash
memory's physical characteristics. The situation gets
even worse when bad block occurrences are accompanied
by sudden power failures. We propose a bad block
management scheme called P-BMS that can fully utilize
flash-level parallelism, while guaranteeing provably
correct block replacement. Experiments show that our
P-BMS achieves a throughput that is more than 95\% of
the maximum bandwidth of the flash controller, even
with bad block occurrences far heavier than in real
flash memory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "140",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2017:PIE,
author = "Fei Wu and Meng Zhang and Yajuan Du and Xubin He and
Ping Huang and Changsheng Xie and Jiguang Wan",
title = "A Program Interference Error Aware {LDPC} Scheme for
Improving {NAND} Flash Decoding Performance",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "141:1--141:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126563",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "By scaling down to smaller cell size, NAND flash has
significantly increased the storage capacity in order
to lower the unit cost down. However, the reliability
is sacrificed due to much higher raw bit error rates.
As a result, conventional error correction codes
(ECCs), such as BCH codes, are not sufficient.
Low-density parity check (LDPC) codes with stronger
error correction capability are adopted in NAND flash
to guarantee data reliability. However, read
performance using LDPC is poor because of its decoding
complexity. It has been found that flash cells with
fewer electrons are more prone to program interference
errors. As a result, program interference errors show
the characteristic of value dependence. This
characteristic can be exploited and translated into
extra information facilitating the decoding
convergence. Motivated by this observation, we propose
PEAL: a flash program interference error aware LDPC
scheme to enhance the decoding performance. PEAL
integrates the obtained extra information from the
value dependence into the soft-to-hard decision process
in LDPC decoding to decrease decoding iterations and
improve the decoding convergence speed. Simulation
results show that decoding iterations are reduced by up
to 69.37\% and the decoding convergence speed is
improved by up to $ 2.5 \times $, compared with the
normalized min-sum (NMS) algorithm with 2KB information
lengths at an approximate raw bit error rate of $ 11.5
\times 10^{-3} $.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "141",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2017:PAP,
author = "Yi Wang and Lisha Dong and Rui Mao",
title = "{P-Alloc}: Process-Variation Tolerant Reliability
Management for {$3$D} Charge-Trapping Flash Memory",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "142:1--142:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126554",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Three-dimensional (3D) flash memory is an emerging
memory technology that enables a number of improvements
to conventional planar NAND flash memory, including
larger capacity, less program disturbance, and lower
access latency. In contrast to conventional planar
flash memory, 3D flash memory adopts charge-trapping
mechanism. NAND strings punch through multiple stacked
layers to form the three-dimensional infrastructure.
However, the etching processes for NAND strings are
unable to produce perfectly vertical features,
especially on the scale of 20 nanometers or less. The
process variation will cause uneven distribution of
electrons, which poses a threat to the integrity of
data stored in flash. This paper present P-Alloc, a
process-variation tolerant reliability management
strategy for 3D charge-trapping flash memory. P-Alloc
offers both hardware and software support to allocate
data to the 3D flash in the presence of process
variation. P-Alloc predicts the state of a physical
page, i.e., the basic unit for each write or read
operation in flash memory, and tries to assign critical
data to more reliable pages. A hardware-based voltage
threshold compensation scheme is also proposed to
further reduce the faults. We demonstrate the viability
of the proposed scheme using a variety of realistic
workloads. Our extensive evaluations show that, P-Alloc
significantly enhances the reliability and reduces the
access latency compared to the baseline scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "142",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tan:2017:ASA,
author = "Benjamin Tan and Morteza Biglari-Abhari and Zoran
Salcic",
title = "An Automated Security-Aware Approach for Design of
Embedded Systems on {MPSoC}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "143:1--143:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126553",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "MPSoC-based embedded systems design is becoming
increasingly complex. Not only do we need to satisfy
multiple design objectives, we increasingly need to
address potential security risks. In this work, we
propose a security-aware systematic design approach
which explores the design space, given a system-level
application description, by generating potential
architecture configurations of execution platform nodes
that are interconnected using a NoC. We then perform
automated security analysis to check the generated
configurations against designer-specified security
constraints. Following the analysis, we use an
automated architecture configuration refinement process
to generate a list of security additions that are
inserted into the initial configuration so that the
security constraints are satisfied. By performing this
refinement on several candidate configuration options,
we can explore the trade-off between resource cost and
security. In this paper, we illustrate the proposed
approach using a Smart Home Control System
application.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "143",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tsoutsouras:2017:SSO,
author = "Vasileios Tsoutsouras and Dimosthenis Masouros and
Sotirios Xydis and Dimitrios Soudris",
title = "{SoftRM}: Self-Organized Fault-Tolerant Resource
Management for Failure Detection and Recovery in {NoC}
Based Many-Cores",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "144:1--144:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126562",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many-core systems are envisioned to leverage the
ever-increasing demand for more powerful computing
systems. To provide the necessary computing power, the
number of Processing Elements integrated on-chip
increases and NoC based infrastructures are adopted to
address the interconnection scalability. The advent of
these new architectures surfaces the need for more
sophisticated, distributed resource management
paradigms, which in addition to the extreme integration
scaling, make the new systems more prone to errors
manifested both at hardware and software. In this work,
we highlight the need for Run-Time Resource management
to be enhanced with fault tolerance features and
propose SoftRM, a resource management framework which
can dynamically adapt to permanent failures in a
self-organized, workload-aware manner.
Self-organization allows the resource management agents
to recover from a failure in a coordinated way by
electing a new agent to replace the failed one, while
workload awareness optimizes this choice according to
the status of each core. We evaluate the proposed
framework on Intel Single-chip Cloud Computer (SCC), a
NoC based many-core system and customize it to achieve
minimum interference on the resource allocation
process. We showcase that its workload-aware features
manage to utilize free resources in more that 90\% of
the conducted experiments. Comparison with relevant
state-of-the-art fault tolerant frameworks shows
decrease of up to 67\% in the imposed overhead on
application execution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "144",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhat:2017:PTS,
author = "Ganapati Bhat and Suat Gumussoy and Umit Y. Ogras",
title = "Power-Temperature Stability and Safety Analysis for
Multiprocessor Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "145:1--145:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126567",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern multiprocessor system-on-chips (SoCs) integrate
multiple heterogeneous cores to achieve high energy
efficiency. The power consumption of each core
contributes to an increase in the temperature across
the chip floorplan. In turn, higher temperature
increases the leakage power exponentially, and leads to
a positive feedback with nonlinear dynamics. This paper
presents a power-temperature stability and safety
analysis technique for multiprocessor systems. This
analysis reveals the conditions under which the
power-temperature trajectory converges to a stable
fixed point. We also present a simple formula to
compute the stable fixed point and maximum
thermally-safe power consumption at runtime. Hardware
measurements on a state-of-the-art mobile processor
show that our analytical formulation can predict the
stable fixed point with an average error of 2.6\%.
Hence, our approach can be used at runtime to ensure
thermally safe operation and guard against thermal
threats.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "145",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2017:CEG,
author = "Siqi Wang and Guanwen Zhong and Tulika Mitra",
title = "{CGPredict}: Embedded {GPU} Performance Estimation
from Single-Threaded Applications",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "146:1--146:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126546",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous multiprocessor system-on-chip
architectures are endowed with accelerators such as
embedded GPUs and FPGAs capable of general-purpose
computation. The application developers for such
platforms need to carefully choose the accelerator with
the maximum performance benefit. For a given
application, usually, the reference code is specified
in a high-level single-threaded programming language
such as C. The performance of an application kernel on
an accelerator is a complex interplay among the exposed
parallelism, the compiler, and the accelerator
architecture. Thus, determining the performance of a
kernel requires its redevelopment into each
accelerator-specific language, causing substantial
wastage of time and effort. To aid the developer in
this early design decision, we present an analytical
framework CGPredict to predict the performance of a
computational kernel on an embedded GPU architecture
from un-optimized, single-threaded C code. The
analytical approach provides insights on application
characteristics which suggest further
application-specific optimizations. The estimation
error is as low as 2.66\% (average 9\%) compared to the
performance of the same kernel written in native CUDA
code running on NVIDIA Kepler embedded GPU. This low
performance estimation error enables CGPredict to
provide an early design recommendation of the
accelerator starting from C code.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "146",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Singh:2017:EER,
author = "Amit Kumar Singh and Alok Prakash and Karunakar Reddy
Basireddy and Geoff V. Merrett and Bashir M.
Al-Hashimi",
title = "Energy-Efficient Run-Time Mapping and Thread
Partitioning of Concurrent {OpenCL} Applications on
{CPU--GPU MPSoCs}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "147:1--147:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126548",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous Multi-Processor Systems-on-Chips
(MPSoCs) containing CPU and GPU cores are typically
required to execute applications concurrently. However,
as will be shown in this paper, existing approaches are
not well suited for concurrent applications as they are
developed either by considering only a single
application or they do not exploit both CPU and GPU
cores at the same time. In this paper, we propose an
energy-efficient run-time mapping and thread
partitioning approach for executing concurrent OpenCL
applications on both GPU and GPU cores while satisfying
performance requirements. Depending upon the
performance requirements, for each concurrently
executing application, the mapping process finds the
appropriate number of CPU cores and operating
frequencies of CPU and GPU cores, and the partitioning
process identifies an efficient partitioning of the
applications' threads between CPU and GPU cores. We
validate the proposed approach experimentally on the
Odroid-XU3 hardware platform with various mixes of
applications from the Polybench benchmark suite.
Additionally, a case-study is performed with a
real-world application SLAMBench. Results show an
average energy saving of 32\% compared to existing
approaches while still satisfying the performance
requirements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "147",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Oneal:2017:GPE,
author = "Kenneth O'neal and Philip Brisk and Ahmed Abousamra
and Zack Waters and Emily Shriver",
title = "{GPU} Performance Estimation using Software
Rasterization and Machine Learning",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "148:1--148:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126557",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper introduces a predictive modeling framework
to estimate the performance of GPUs during pre-silicon
design. Early-stage performance prediction is useful
when simulation times impede development by rendering
driver performance validation, API conformance testing
and design space explorations infeasible. Our approach
builds a Random Forest regression model to analyze
DirectX 3D workload behavior when executed by a
software rasterizer, which we have extended with a
workload characterizer to collect further performance
information via program counters. In addition to
regression models, this work produces detailed feature
rankings which can provide valuable architectural
insight, and accurate performance estimates for an
Intel integrated Skylake generation GPU. Our models
achieve reasonable out-of-sample-error rates of 14\%,
with an average simulation speedup of 327x.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "148",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fezzardi:2017:UEP,
author = "Pietro Fezzardi and Marco Lattuada and Fabrizio
Ferrandi",
title = "Using Efficient Path Profiling to Optimize Memory
Consumption of On-Chip Debugging for High-Level
Synthesis",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "149:1--149:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126564",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "High-Level Synthesis (HLS) for FPGAs is attracting
popularity and is increasingly used to handle complex
systems with multiple integrated components. To
increase performance and efficiency, HLS flows now
adopt several advanced optimization techniques.
Aggressive optimizations and system level integration
can cause the introduction of bugs that are only
observable on-chip. Debugging support for circuits
generated with HLS is receiving a considerable
attention. Among the data that can be collected on chip
for debugging, one of the most important is the state
of the Finite State Machines (FSM) controlling the
components of the circuit. However, this usually
requires a large amount of memory to trace the behavior
during the execution. This work proposes an approach
that takes advantage of the HLS information and of the
structure of the FSM to compress control flow traces
and to integrate optimized components for on-chip
debugging. The generated checkers analyze the FSM
execution on-fly, automatically notifying when a bug is
detected, localizing it and providing data about its
cause. The traces are compressed using a software
profiling technique, called Efficient Path Profiling
(EPP), adapted for the debugging of hardware
accelerators generated with HLS. With this technique,
the size of the memory used to store control flow
traces can be reduced up to 2 orders of magnitude,
compared to state-of-the-art.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "149",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Piccolboni:2017:CCH,
author = "Luca Piccolboni and Paolo Mantovani and Giuseppe {Di
Guglielmo} and Luca P. Carloni",
title = "{COSMOS}: Coordination of High-Level Synthesis and
Memory Optimization for Hardware Accelerators",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "150:1--150:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126566",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hardware accelerators are key to the efficiency and
performance of system-on-chip (SoC) architectures. With
high-level synthesis (HLS), designers can easily obtain
several performance-cost trade-off implementations for
each component of a complex hardware accelerator.
However, navigating this design space in search of the
Pareto-optimal implementations at the system level is a
hard optimization task. We present COSMOS, an automatic
methodology for the design-space exploration (DSE) of
complex accelerators, that coordinates both HLS and
memory optimization tools in a compositional way.
First, thanks to the co-design of datapath and memory,
COSMOS produces a large set of Pareto-optimal
implementations for each component of the accelerator.
Then, COSMOS leverages compositional design techniques
to quickly converge to the desired trade-off point
between cost and performance at the system level. When
applied to the system-level design (SLD) of an
accelerator for wide-area motion imagery (WAMI), COSMOS
explores the design space as completely as an
exhaustive search, but it reduces the number of
invocations to the HLS tool by up to $ 14.6 \times $.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "150",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Motamedi:2017:MIR,
author = "Mohammad Motamedi and Daniel Fong and Soheil Ghiasi",
title = "Machine Intelligence on Resource-Constrained {IoT}
Devices: The Case of Thread Granularity Optimization
for {CNN} Inference",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "151:1--151:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126555",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Despite their remarkable performance in various
machine intelligence tasks, the computational intensity
of Convolutional Neural Networks (CNNs) has hindered
their widespread utilization in resource-constrained
embedded and IoT systems. To address this problem, we
present a framework for synthesis of efficient CNN
inference software targeting mobile SoC platforms. We
argue that thread granularity can substantially impact
the performance and energy dissipation of the
synthesized inference software, and demonstrate that
launching the maximum number of logical threads, often
promoted as a guiding principle by GPGPU practitioners,
does not result in an efficient implementation for
mobile SoCs. We hypothesize that the runtime of a CNN
layer on a particular SoC platform can be accurately
estimated as a linear function of its computational
complexity, which may seem counter-intuitive, as modern
mobile SoCs utilize a plethora of heterogeneous
architectural features and dynamic resource management
policies. Consequently, we develop a principled
approach and a data-driven analytical model to optimize
granularity of threads during CNN software synthesis.
Experimental results with several modern CNNs mapped to
a commodity Android smartphone with a Snapdragon SoC
show up to 2.37X speedup in application runtime, and up
to 1.9X improvement in its energy dissipation compared
to existing approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "151",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vougioukas:2017:NFS,
author = "Ilias Vougioukas and Andreas Sandberg and Stephan
Diestelhorst and Bashir M. Al-Hashimi and Geoff V.
Merrett",
title = "Nucleus: Finding the Sharing Limit of Heterogeneous
Cores",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "152:1--152:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126544",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous multi-processors are designed to bridge
the gap between performance and energy efficiency in
modern embedded systems. This is achieved by pairing
Out-of-Order (OoO) cores, yielding performance through
aggressive speculation and latency masking, with
In-Order (InO) cores, that preserve energy through
simpler design. By leveraging migrations between them,
workloads can therefore select the best setting for any
given energy/delay envelope. However, migrations
introduce execution overheads that can hurt performance
if they happen too frequently. Finding the optimal
migration frequency is critical to maximize energy
savings while maintaining acceptable performance. We
develop a simulation methodology that can (1) isolate
the hardware effects of migrations from the software,
(2) directly compare the performance of different core
types, (3) quantify the performance degradation and (4)
calculate the cost of migrations for each case. To
showcase our methodology we run mibench, a
microbenchmark suite, and show that migrations can
happen as fast as every 100k instructions with little
performance loss. We also show that, contrary to
numerous recent studies, hypothetical designs do not
need to share all of their internal components to be
able to migrate at that frequency. Instead, we propose
a feasible system that shares level 2 caches and a
translation lookaside buffer that matches performance
and efficiency. Our results show that there are phases
comprising up to 10\% that a migration to the OoO core
leads to performance benefits without any additional
energy cost when running on the InO core, and up to 6\%
of phases where a migration to the InO core can save
energy without affecting performance. When considering
a policy that focuses on improving the energy-delay
product, results show that on average 66\% of the
phases can be migrated to deliver equal or better
system operation without having to aggressively share
the entire memory system or to revert to migration
periods finer than 100k instructions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "152",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Papagiannopoulou:2017:ETE,
author = "Dimitra Papagiannopoulou and Andrea Marongiu and Tali
Moreshet and Maurice Herlihy and R. Iris Bahar",
title = "{Edge-TM}: Exploiting Transactional Memory for Error
Tolerance and Energy Efficiency",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "153:1--153:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126556",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Scaling of semiconductor devices has enabled higher
levels of integration and performance improvements at
the price of making devices more susceptible to the
effects of static and dynamic variability. Adding
safety margins (guardbands) on the operating frequency
or supply voltage prevents timing errors, but has a
negative impact on performance and energy consumption.
We propose Edge-TM, an adaptive hardware/software error
management policy that (i) optimistically scales the
voltage beyond the edge of safe operation for better
energy savings and (ii) works in combination with a
Hardware Transactional Memory (HTM)-based error
recovery mechanism. The policy applies dynamic voltage
scaling (DVS) (while keeping frequency fixed) based on
the feedback provided by HTM, which makes it simple and
generally applicable. Experiments on an embedded
platform show our technique capable of 57\% energy
improvement compared to using voltage guardbands and an
extra 21--24\% improvement over existing
state-of-the-art error tolerance solutions, at a
nominal area and time overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "153",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vogel:2017:EVM,
author = "Pirmin Vogel and Andreas Kurth and Johannes Weinbuch
and Andrea Marongiu and Luca Benini",
title = "Efficient Virtual Memory Sharing via On-Accelerator
Page Table Walking in Heterogeneous Embedded {SoCs}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "154:1--154:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126560",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Shared virtual memory is key in heterogeneous systems
on chip (SoCs) that combine a general-purpose host
processor with a many-core accelerator, both for
programmability and performance. In contrast to the
full-blown, hardware-only solutions predominant in
modern high-end systems, lightweight hardware-software
co-designs are better suited in the context of more
power- and area-constrained embedded systems and
provide additional benefits in terms of flexibility and
predictability. As a downside, the latter solutions
require the host to handle in software synchronization
in case of page misses as well as miss handling. This
may incur considerable run-time overheads. In this
work, we present a novel hardware-software virtual
memory management approach for many-core accelerators
in heterogeneous embedded SoCs. It exploits an
accelerator-side helper thread concept that enables the
accelerator to manage its virtual memory hardware
autonomously while operating cache-coherently on the
page tables of the user-space processes of the host.
This greatly reduces overhead with respect to host-side
solutions while retaining flexibility. We have
validated the design with a set of parameterizable
benchmarks and real-world applications covering various
application domains. For purely memory-bound kernels,
the accelerator performance improves by a factor of 3.8
compared with host-based management and lies within
50\% of a lower-bound ideal memory management unit.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "154",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khouzani:2017:DBS,
author = "Hoda Aghaei Khouzani and Chengmo Yang",
title = "A {DWM}-Based Stack Architecture Implementation for
Energy Harvesting Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "155:1--155:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126543",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy harvesting systems tend to use non-volatile
processors to conduct computation under intermittent
power supplies. While previous implementations of
non-volatile processors are based on register
architectures, stack architecture, known for its
simplicity and small footprint, seems to be a better
fit for energy harvesting systems. In this work, Domain
Wall Memory (DWM) is used to implement ZPU, the world's
smallest working CPU. Not only does DWM offer
ultra-high density and SRAM-comparable access latency,
but the sequential access structure of DWM also makes
it well suited for a stack whose accesses display high
temporal locality. As the performance and energy of DWM
are determined by the number of shift operations
performed to access the stack, this paper further
reduces shift operations through novel data placement
and micro-code transformation optimizations. The impact
of compiler optimization techniques on the number of
shift operations is also investigated so as to select
the most effective optimizations for DWM-based stack
machine. Experimental studies confirm the effectiveness
of the proposed DWM-based stack architectures in
improving the performance and energy-efficiency of
energy harvesting systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "155",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2017:FPC,
author = "Jaehyun Park and Hitesh Joshi and Hyung Gyu Lee and
Sayfe Kiaei and Umit Y. Ogras",
title = "Flexible {PV}-cell Modeling for Energy Harvesting in
Wearable {IoT} Applications",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "156:1--156:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126568",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wearable devices with sensing, processing and
communication capabilities have become feasible with
the advances in internet-of-things (IoT) and low power
design technologies. Energy harvesting is extremely
important for wearable IoT devices due to size and
weight limitations of batteries. One of the most widely
used energy harvesting sources is photovoltaic cell
(PV-cell) owing to its simplicity and high output
power. In particular, flexible PV-cells offer great
potential for wearable applications. This paper models,
for the first time, how bending a PV-cell significantly
impacts the harvested energy. Furthermore, we derive an
analytical model to quantify the harvested energy as a
function of the radius of curvature. We validate the
proposed model empirically using a commercial PV-cell
under a wide range of bending scenarios, light
intensities and elevation angles. Finally, we show that
the proposed model can accelerate maximum power point
tracking algorithms and increase the harvested energy
by up to 25.0\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "156",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Andalam:2017:NEM,
author = "Sidharta Andalam and Nathan Allen and Avinash Malik
and Partha S. Roop and Mark Trew",
title = "A Novel Emulation Model of the Cardiac Conduction
System",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "157:1--157:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126542",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Models of the cardiac conduction system are usually at
two extremes: (1) high fidelity models with excellent
precision but lacking a real-time response for
emulation (hardware in the loop simulation); or (2)
models amenable for emulation, but that do not exhibit
appropriate dynamic response, which is necessary for
arrhythmia susceptibility. We introduce two
abstractions to remedy the situation. The first
abstraction is a new cell model, which is a semi-linear
hybrid automata. The proposed model is as
computationally efficient as current state-of-the-art
cell models amenable for emulation. Yet, unlike these
models, it is also able to capture the dynamic response
of the cardiac cell like the higher-fidelity models.
The second abstraction is the use of smooth-tokens to
develop a new path model, connecting cells, which is
efficient in terms of memory consumption. Moreover, the
memory requirements of the path model can be statically
bounded and are invariant to the emulation step size.
Results show that the proposed semi-linear abstraction
for the cell reduces the execution time by up to 44\%.
Furthermore, the smooth-tokens based path model reduces
the memory consumption by 40 times when compared to
existing path models. This paves the way for the
emulation of complex cardiac conduction systems, using
hardware code-generators.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "157",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rouhani:2017:RAF,
author = "Bita Darvish Rouhani and Azalia Mirhoseini and Farinaz
Koushanfar",
title = "{RISE}: an Automated Framework for Real-Time
Intelligent Video Surveillance on {FPGA}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "158:1--158:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126549",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper proposes RISE, an automated Reconfigurable
framework for real-time background subtraction applied
to Intelligent video SurveillancE. RISE is devised with
a new streaming-based methodology that adaptively
learns/updates a corresponding dictionary matrix from
background pixels as new video frames are captured over
time. This dictionary is used to highlight the
foreground information in each video frame. A key
characteristic of RISE is that it adaptively adjusts
its dictionary for diverse lighting conditions and
varying camera distances by continuously updating the
corresponding dictionary. We evaluate RISE on
natural-scene vehicle images of different backgrounds
and ambient illuminations. To facilitate automation, we
provide an accompanying API that can be used to deploy
RISE on FPGA-based system-on-chip platforms. We
prototype RISE for end-to-end deployment of three
widely-adopted image processing tasks used in
intelligent transportation systems: License Plate
Recognition (LPR), image denoising/reconstruction, and
principal component analysis. Our evaluations
demonstrate up to 87-fold higher throughput per energy
unit compared to the prior-art software solution
executed on ARM Cortex-A15 embedded platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "158",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Basu:2017:IUL,
author = "Soumya Basu and Loris Duch and Rub{\'e}n Braojos and
Giovanni Ansaloni and Laura Pozzi and David Atienza",
title = "An Inexact Ultra-low Power Bio-signal Processing
Architecture With Lightweight Error Recovery",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "159:1--159:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126565",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The energy efficiency of digital architectures is
tightly linked to the voltage level (Vdd) at which they
operate. Aggressive voltage scaling is therefore
mandatory when ultra-low power processing is required.
Nonetheless, the lowest admissible Vdd is often bounded
by reliability concerns, especially since static and
dynamic non-idealities are exacerbated in the
near-threshold region, imposing costly guard-bands to
guarantee correctness under worst-case conditions. A
striking alternative, explored in this paper, waives
the requirement for unconditional correctness,
undergoing more relaxed constraints. First, after a
run-time failure, processing correctly resumes at a
later point in time. Second, failures induce a limited
Quality-of-Service (QoS) degradation. We focus our
investigation on the practical scenario of embedded
bio-signal analysis, a domain in which energy
efficiency is key, while applications are inherently
error-tolerant to a certain degree. Targeting a
domain-specific multi-core platform, we present a study
of the impact of inexactness on application-visible
errors. Then, we introduce a novel methodology to
manage them, which requires minimal hardware resources
and a negligible energy overhead. Experimental evidence
show that, by tolerating 900 errors/hour, the resulting
inexact platform can achieve an efficiency increase of
up to 24\%, with a QoS degradation of less than 3\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "159",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{VanPinxten:2017:OSR,
author = "Joost {Van Pinxten} and Umar Waqas and Marc Geilen and
Twan Basten and Lou Somers",
title = "Online Scheduling of $2$-Re-entrant Flexible
Manufacturing Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "160:1--160:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126551",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Online scheduling of operations is essential to
optimize productivity of flexible manufacturing systems
(FMSs) where manufacturing requests arrive on the fly.
An FMS processes products according to a particular
flow through processing stations. This work focusses on
online scheduling of re-entrant FMSs with flows using
processing stations where products pass twice and with
limited buffering between processing stations. This
kind of FMS is modelled as a re-entrant flow shop with
due dates and sequence-dependent set-up times. Such
flow shops can benefit from minimization of the time
penalties incurred from set-up times. On top of an
existing greedy scheduling heuristic we apply a
meta-heuristic that simultaneously explores several
alternatives considering trade-offs between the used
metrics by the scheduling heuristic. We identify
invariants to efficiently remove many infeasible
scheduling options so that the running time of online
implementations is improved. The resulting algorithm is
much faster than the state of the art and produces
schedules with on average 4.6\% shorter makespan.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "160",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Beckert:2017:RTA,
author = "Matthias Beckert and Rolf Ernst",
title = "Response Time Analysis for Sporadic Server Based
Budget Scheduling in Real Time Virtualization
Environments",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "161:1--161:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126559",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Virtualization techniques for embedded real-time
systems typically employ TDMA scheduling to achieve
temporal isolation among different virtualized
applications. Recent work already introduced sporadic
server based solutions relying on budgets instead of a
fixed TDMA schedule. While providing better
average-case response times for IRQs and tasks, a
formal response time analysis for the worst-case is
still missing. In order to confirm the advantage of a
sporadic server based budget scheduling, this paper
provides a worst-case response time analysis. To
improve the sporadic server based budget scheduling
even more, we provide a background scheduling
implementation which will also be covered by the formal
analysis. We show correctness of the analysis approach
and compare it against TDMA based systems. In addition
to that, we provide response time measurements from a
working hypervisor implementation on an ARM based
development board.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "161",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2017:RTD,
author = "Xiaowen Chen and Zhonghai Lu and Sheng Liu and Shuming
Chen",
title = "Round-trip {DRAM} Access Fairness in {$3$D}
{NoC-based} Many-core Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "162:1--162:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126561",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In 3D NoC-based many-core systems, DRAM accesses
behave differently due to their different communication
distances and the latency gap of different DRAM
accesses becomes bigger as the network size increases,
which leads to unfair DRAM access performance among
different nodes. This phenomenon may lead to high
latencies for some DRAM accesses that become the
performance bottleneck of the system. The paper
addresses the DRAM access fairness problem in 3D
NoC-based many-core systems by narrowing the latency
difference of DRAM accesses as well as reducing the
maximum latency. Firstly, the latency of a round-trip
DRAM access is modeled and the factors causing DRAM
access latency difference are discussed in detail.
Secondly, the DRAM access fairness is further
quantitatively analyzed through experiments. Thirdly,
we propose to predict the network latency of round-trip
DRAM accesses and use the predicted round-trip DRAM
access time as the basis to prioritize the DRAM
accesses in DRAM interfaces so that the DRAM accesses
with potential high latencies can be transferred as
early and fast as possible, thus achieving fair DRAM
access. Experiments with synthetic and application
workloads validate that our approach can achieve fair
DRAM access and outperform the traditional
First-Come-First-Serve (FCFS) scheduling policy and the
scheduling policies proposed by reference [7] and [24]
in terms of maximum latency, Latency Standard Deviation
(LSD)1 and speedup. In the experiments, the maximum
improvement of the maximum latency, LSD, and speedup
are 12.8\%, 6.57\%, and 8.3\% respectively. Besides,
our proposal brings very small extra hardware overhead
($<$ 0.6\%) in comparison to the three counterparts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "162",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2017:MAA,
author = "Jaewoo Lee and Hoon Sung Chwa and Linh T. X. Phan and
Insik Shin and Insup Lee",
title = "{MC-ADAPT}: Adaptive Task Dropping in
Mixed-Criticality Scheduling",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "163:1--163:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126498",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent embedded systems are becoming integrated
systems with components of different criticality. To
tackle this, mixed-criticality systems aim to provide
different levels of timing assurance to components of
different criticality levels while achieving efficient
resource utilization. Many approaches have been
proposed to execute more lower-criticality tasks
without affecting the timeliness of higher-criticality
tasks. Those previous approaches however have at least
one of the two limitations; (i) they penalize all
lower-criticality tasks at once upon a certain
situation, or (ii) they make the decision how to
penalize lower-criticality tasks at design time. As a
consequence, they under-utilize resources by imposing
an excessive penalty on low-criticality tasks. Unlike
those existing studies, we present a novel framework,
called MC-ADAPT, that aims to minimally penalize
lower-criticality tasks by fully reflecting the
dynamically changing system behavior into adaptive
decision making. Towards this, we propose a new
scheduling algorithm and develop its runtime
schedulability analysis capable of capturing the
dynamic system state. Our proposed algorithm adaptively
determines which task to drop based on the runtime
analysis. To determine the quality of task dropping
solution, we propose the speedup factor for task
dropping while the conventional use of the speedup
factor only evaluates MC scheduling algorithms in terms
of the worst-case schedulability. We apply the speedup
factor for a newly-defined task dropping problem that
evaluates task dropping solution under different
runtime scheduling scenarios. We derive that MC-ADAPT
has a speedup factor of 1.619 for task drop. This
implies that MC-ADAPT can behave the same as the
optimal scheduling algorithm with optimal task dropping
strategy does under any runtime scenario if the system
is sped up by a factor of 1.619.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "163",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rouxel:2017:TCD,
author = "Benjamin Rouxel and Steven Derrien and Isabelle
Puaut",
title = "Tightening Contention Delays While Scheduling Parallel
Applications on Multi-core Architectures",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "164:1--164:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126496",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multi-core systems are increasingly interesting
candidates for executing parallel real-time
applications, in avionic, space or automotive
industries, as they provide both computing capabilities
and power efficiency. However, ensuring that timing
constraints are met on such platforms is challenging,
because some hardware resources are shared between
cores. Assuming worst-case contentions when analyzing
the schedulability of applications may result in
systems mistakenly declared unschedulable, although the
worst-case level of contentions can never occur in
practice. In this paper, we present two
contention-aware scheduling strategies that produce a
time-triggered schedule of the application's tasks.
Based on knowledge of the application's structure, our
scheduling strategies precisely estimate the effective
contentions, in order to minimize the overall makespan
of the schedule. An Integer Linear Programming (ILP)
solution of the scheduling problem is presented, as
well as a heuristic solution that generates schedules
very close to ones of the ILP (5\% longer on average),
with a much lower time complexity. Our heuristic
improves by 19\% the overall makespan of the resulting
schedules compared to a worst-case contention
baseline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "164",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2017:DAT,
author = "Rehan Ahmed and Pengcheng Huang and Max Millen and
Lothar Thiele",
title = "On The Design and Application of Thermal Isolation
Servers",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "165:1--165:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126512",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recently, there has been an increasing trend towards
executing real-time applications on multi-core
platforms. However, this complicates the design
problem, as applications running on different cores can
interfere due to shared resources and mediums. In this
paper, we focus on thermal interference, where a given
task ($ \tau_1 $) heats the processor, resulting in
reduced service (due to Dynamic Thermal Management
(DTM)) to another task ($ \tau_2 $). In real-time
domain, where tasks have deadline constraints, thermal
interference is a substantial problem as it directly
impacts the Worst Case Execution Time (WCET) of the
effected application ($ \tau_2 $). The problem
exacerbates as we move to mixed-criticality systems,
where the criticality of $ \tau_2$ may be greater than
the criticality of $ \tau_1$, complicating the
certification process. In this paper, we propose a
server based strategy (Thermal Isolation Server (TI
Server)) which can be used to avoid thermal
interference of applications. We also present a
heuristic to design TI Servers to meet the timing
constraints of all tasks and the thermal constraints of
the system. TI Servers are time/space composable, and
can be applied to a variety of task models. We also
evaluate TI Servers on a hardware test-bed for
validation purposes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "165",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Allamigeon:2017:FMC,
author = "Xavier Allamigeon and St{\'e}phane Gaubert and Eric
Goubault and Sylvie Putot and Nikolas Stott",
title = "A Fast Method to Compute Disjunctive Quadratic
Invariants of Numerical Programs",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "166:1--166:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126502",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We introduce a new method to compute non-convex
invariants of numerical programs, which includes the
class of switched affine systems with affine guards. We
obtain disjunctive and non-convex invariants by
associating different partial execution traces with
different ellipsoids. A key ingredient is the solution
of non-monotone fixed points problems over the space of
ellipsoids with a reduction to small size linear matrix
inequalities. This allows us to analyze instances that
are inaccessible in terms of expressivity or scale by
earlier methods based on semi-definite programming.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "166",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schulze:2017:IIM,
author = "Christoph Schulze and Rance Cleaveland",
title = "Improving Invariant Mining via Static Analysis",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "167:1--167:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126504",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper proposes the use of static analysis to
improve the generation of invariants from test data
extracted from Simulink models. Previous work has shown
the utility of such automatically generated invariants
as a means for updating and completing system
specifications; they also are useful as a means of
understanding model behavior. This work shows how the
scalability and accuracy of the data mining process can
be dramatically improved by using information from
data/control flow analysis to reduce the search space
of the invariant mining and to eliminate false
positives. Comparative evaluations of the process show
that the improvements significantly reduce execution
time and memory consumption, thereby supporting the
analysis of more complex models, while also improving
the accuracy of the generated invariants.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "167",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chaki:2017:FVT,
author = "Sagar Chaki and Dionisio {De Niz}",
title = "Formal Verification of a Timing Enforcer
Implementation",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "168:1--168:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126517",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A timing enforcer is a scheduler that not only
allocates CPU cycles to threads, but also uses timers
to enforce time budgets. An approach for verifying
safety properties of timing enforcers at the source
code level is presented. We assume that the enforcer is
implemented as a set of ``enforcer'' functions that are
executed atomically on critical system-level events,
such as the arrival and departure of jobs, and
triggering of timers. The key idea is to express the
safety property as an invariant, and prove that it is
inductive across all the enforcer functions. A formal
semantics of timing enforcers is presented, including
the semantics of functions used to read the system
clock and set timers. Using this semantics, the
verification approach is presented, and its soundness
proved. Further, the approach also takes into
consideration the periodicity of tasks. It is validated
by proving the correctness of the enforcement of CPU
cycle budgets for tasks by the Zero-Slack Rate
Monotonic (zsrm) scheduler, which is implemented in C
as a Linux kernel module. The inductiveness of the
necessary zsrm invariants is proved by expressing them
as function contracts using the acsl specification
language, and verifying the contracts using the frama-c
tool.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "168",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mehrabian:2017:TTL,
author = "Mohammadreza Mehrabian and Mohammad Khayatian and
Aviral Shrivastava and John C. Eidson and Patricia
Derler and Hugo A. Andrade and Ya-Shian Li-Baboud and
Edward Griffor and Marc Weiss and Kevin Stanton",
title = "Timestamp Temporal Logic {(TTL)} for Testing the
Timing of Cyber-Physical Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "169:1--169:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126510",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In order to test the performance and verify the
correctness of Cyber-Physical Systems (CPS), the timing
constraints on the system behavior must be met. Signal
Temporal Logic (STL) can efficiently and succinctly
capture the timing constraints of a given system model.
However, many timing constraints on CPS are more
naturally expressed in terms of events on signals.
While it is possible to specify event-based timing
constraints in STL, such statements can quickly become
long and arcane in even simple systems. Timing
constraints for CPS, which can be large and complex
systems, are often associated with tolerances, the
expression of which can make the timing constraints
even more cumbersome using STL. This paper proposes a
new logic, Timestamp Temporal Logic (TTL), to provide a
definitional extension of STL that more intuitively
expresses the timing constraints of distributed CPS.
TTL also allows for a more natural expression of timing
tolerances. Additionally, this paper outlines a
methodology to automatically generate logic code and
programs to monitor the expressed timing constraints.
Since our TTL monitoring logic evaluates the timing
constraints using only the timestamps of the required
events on the signal, the TTL monitoring logic has
significantly less memory footprint when compared to
traditional STL monitoring logic, which stores the
signal value at the required sampling frequency. The
key contribution of this paper is a scalable approach
for online monitoring of the timing constraints. We
demonstrate the capabilities of TTL and our methodology
for online monitoring of TTL constraints on two case
studies: (1) Synchronization and phase control of two
generators and, (2) Simultaneous image capture using
distributed cameras for 3D image reconstruction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "169",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Deshmukh:2017:TCP,
author = "Jyotirmoy Deshmukh and Marko Horvat and Xiaoqing Jin
and Rupak Majumdar and Vinayak S. Prabhu",
title = "Testing Cyber-Physical Systems through {Bayesian}
Optimization",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "170:1--170:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126521",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many problems in the design and analysis of
cyber-physical systems (CPS) reduce to the following
optimization problem: given a CPS which transforms
continuous-time input traces in R$_m$ to
continuous-time output traces in R$_n$ and a cost
function over output traces, find an input trace which
minimizes the cost. Cyber-physical systems are
typically so complex that solving the optimization
problem analytically by examining the system dynamics
is not feasible. We consider a black-box approach,
where the optimization is performed by testing the
input-output behaviour of the CPS. We provide a
unified, tool-supported methodology for CPS testing and
optimization. Our tool is the first CPS testing tool
that supports Bayesian optimization. It is also the
first to employ fully automated dimensionality
reduction techniques. We demonstrate the potential of
our tool by running experiments on multiple industrial
case studies. We compare the effectiveness of Bayesian
optimization to state-of-the-art testing techniques
based on CMA-ES and Simulated Annealing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "170",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2017:WHS,
author = "Youcheng Sun and Marco {Di Natale}",
title = "Weakly Hard Schedulability Analysis for Fixed Priority
Scheduling of Periodic Real-Time Tasks",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "171:1--171:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126497",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The hard deadline model is very popular in real-time
research, but is representative or applicable to a
small number of systems. Many applications, including
control systems, are capable of tolerating occasional
deadline misses, but are seriously compromised by a
repeating pattern of late terminations. The weakly hard
real-time model tries to capture these requirements by
analyzing the conditions that guarantee that a maximum
number of deadlines can be possibly missed in any set
of consecutive activations. We provide a new weakly
hard schedulability analysis method that applies to
constrained-deadline periodic real-time systems
scheduled with fixed priority and without knowledge of
the task activation offsets. The analysis is based on a
Mixed Integer Linear Programming (MILP) problem
formulation; it is very general and can be adapted to
include the consideration of resource sharing and
activation jitter. A set of experiments conducted on an
automotive engine control application and randomly
generated tasksets show the applicability and accuracy
of the proposed technique.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "171",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schlatow:2017:RTA,
author = "Johannes Schlatow and Rolf Ernst",
title = "Response-Time Analysis for Task Chains with Complex
Precedence and Blocking Relations",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "172:1--172:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126505",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "For the development of complex software systems, we
often resort to component-based approaches that
separate the different concerns, enhance verifiability
and reusability, and for which microkernel-based
implementations are a good fit to enforce these
concepts. Composing such a system of several
interacting software components will, however, lead to
complex precedence and blocking relations, which must
be taken into account when performing latency analysis.
When modelling these systems by classical task graphs,
some of these effects are obfuscated and tend to render
such an analysis either overly pessimistic or even
optimistic. We therefore firstly present a novel task
(meta-)model that is more expressive and accurate
w.r.t. these (functional) precedence and mutual
blocking relations. Secondly, we apply the busy-window
approach and formulate a modular response-time analysis
on task-chain level suitable but not restricted to
static-priority scheduled systems. We show that the
conjunction of both concepts allows the calculation of
reasonably tight latency bounds for scenarios not
adequately covered by related work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "172",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kurtin:2017:ART,
author = "Philip S. Kurtin and Marco J. G. Bekooij",
title = "An Abstraction-Refinement Theory for the Analysis and
Design of Real-Time Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "173:1--173:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126507",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Component-based and model-based reasonings are key
concepts to address the increasing complexity of
real-time systems. Bounding abstraction theories allow
to create efficiently analyzable models that can be
used to give temporal or functional guarantees on
non-deterministic and non-monotone implementations.
Likewise, bounding refinement theories allow to create
implementations that adhere to temporal or functional
properties of specification models. For systems in
which jitter plays a major role, both best-case and
worst-case bounding models are needed. In this paper we
present a bounding abstraction-refinement theory for
real-time systems. Compared to the state-of-the-art
TETB refinement theory, our theory is less restrictive
with respect to the automatic lifting of properties
from component to graph level and does not only support
temporal worst-case refinement, but evenhandedly
temporal and functional, best-case and worst-case
abstraction and refinement.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "173",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Azimi:2017:HHF,
author = "Iman Azimi and Arman Anzanpour and Amir M. Rahmani and
Tapio Pahikkala and Marco Levorato and Pasi Liljeberg
and Nikil Dutt",
title = "{HiCH}: Hierarchical Fog-Assisted Computing
Architecture for Healthcare {IoT}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "174:1--174:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126501",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The Internet of Things (IoT) paradigm holds
significant promises for remote health monitoring
systems. Due to their life- or mission-critical nature,
these systems need to provide a high level of
availability and accuracy. On the one hand, centralized
cloud-based IoT systems lack reliability, punctuality
and availability (e.g., in case of slow or unreliable
Internet connection), and on the other hand, fully
outsourcing data analytics to the edge of the network
can result in diminished level of accuracy and
adaptability due to the limited computational capacity
in edge nodes. In this paper, we tackle these issues by
proposing a hierarchical computing architecture, HiCH,
for IoT-based health monitoring systems. The core
components of the proposed system are (1) a novel
computing architecture suitable for hierarchical
partitioning and execution of machine learning based
data analytics, (2) a closed-loop management technique
capable of autonomous system adjustments with respect
to patient's condition. HiCH benefits from the features
offered by both fog and cloud computing and introduces
a tailored management methodology for healthcare IoT
systems. We demonstrate the efficacy of HiCH via a
comprehensive performance assessment and evaluation on
a continuous remote health monitoring case study
focusing on arrhythmia detection for patients suffering
from CardioVascular Diseases (CVDs).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "174",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2017:ORT,
author = "Yecheng Zhao and Chao Peng and Haibo Zeng and Zonghua
Gu",
title = "Optimization of Real-Time Software Implementing
Multi-Rate Synchronous Finite State Machines",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "175:1--175:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126515",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Model-based design using Synchronous Reactive (SR)
models is becoming widespread for control software
development in industry. However, software synthesis is
challenging for multi-rate SR models consisting of
blocks modeled with finite state machines, due to the
complexity of validating the system's real-time
schedulability. The existing approach uses the
simplified periodic task model to allow an efficient
schedulability analysis, which leads to pessimistic and
suboptimal solutions. Instead, in this paper, we adopt
a more accurate but more complex schedulability
analysis. We develop several optimization techniques to
improve the algorithm's efficiency. Experimental
results on synthetic systems and an industrial case
study show that the proposed optimization framework
preserves the solution optimality but is much faster
(e.g., $ 1000 \times $ for systems with 15 blocks) than
the branch-and-bound algorithm, and it generates better
control software than the existing approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "175",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bourke:2017:SLS,
author = "Timothy Bourke and Francois Carcenac and Jean-Louis
Cola{\c{c}}o and Bruno Pagano and C{\'e}dric Pasteur
and Marc Pouzet",
title = "A Synchronous Look at the {Simulink} Standard
Library",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "176:1--176:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126516",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Hybrid systems modelers like Simulink come with a rich
collection of discrete-time and continuous-time blocks.
Most blocks are not defined in terms of more elementary
ones-and some cannot be-but are instead written in
imperative code and explained informally in a reference
manual. This raises the question of defining a minimal
set of orthogonal programming constructs such that most
blocks can be programmed directly and thereby given a
specification that is mathematically precise, and whose
compiled version performs comparably to handwritten
code. In this paper, we show that a fairly large set of
blocks of a standard library like the one provided by
Simulink can be programmed in a precise, purely
functional language using stream equations,
hierarchical automata, Ordinary Differential Equations
(ODEs), and deterministic synchronous parallel
composition. Some blocks cannot be expressed in our
setting as they mix discrete-time and continuous-time
signals in unprincipled ways that are statically
forbidden by the type checker. The experiment is
conducted in Z{\'e}lus, a synchronous language that
conservatively extends L ustre with ODEs to program
systems that mix discrete-time and continuous-time
signals.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "176",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2017:TAS,
author = "Jiajie Wang and Michael Mendler and Partha Roop and
Bruno Bodin",
title = "Timing Analysis of Synchronous Programs using {WCRT}
Algebra: Scalability through Abstraction",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "177:1--177:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126520",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Synchronous languages are ideal for designing
safety-critical systems. Static Worst-Case Reaction
Time (WCRT) analysis is an essential component in the
design flow that ensures the real-time requirements are
met. There are a few approaches for WCRT analysis, and
the most versatile of all is explicit path enumeration.
However, as synchronous programs are highly concurrent,
techniques based on this approach, such as model
checking, suffer from state explosion as the number of
threads increases. One observation on this problem is
that these existing techniques analyse the program by
enumerating a functionally equivalent automaton while
WCRT is a non-functional property. This mismatch
potentially causes algorithm-induced state explosion.
In this paper, we propose a WCRT analysis technique
based on the notion of timing equivalence, expressed
using WCRT algebra. WCRT algebra can effectively
capture the timing behaviour of a synchronous program
by converting its intermediate representation Timed
Concurrent Control Flow Graph (TCCFG) into a Tick Cost
Automaton (TCA), a minimal automaton that is timing
equivalent to the original program. Then the WCRT is
computed over the TCA. We have implemented our approach
and benchmarked it against state-of-the-art WCRT
analysis techniques. The results show that the WCRT
algebra is 3.5 times faster on average than the fastest
published technique.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "177",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pinisetty:2017:REC,
author = "Srinivas Pinisetty and Partha S. Roop and Steven Smyth
and Nathan Allen and Stavros Tripakis and Reinhard {Von
Hanxleden}",
title = "Runtime Enforcement of Cyber-Physical Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "178:1--178:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126500",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Many implantable medical devices, such as pacemakers,
have been recalled due to failure of their embedded
software. This motivates rethinking their design and
certification processes. We propose, for the first
time, an additional layer of safety by formalising the
problem of run-time enforcement of implantable
pacemakers. While recent work has formalised run-time
enforcement of reactive systems, the proposed framework
generalises existing work along the following
directions: (1) we develop bi-directional enforcement,
where the enforced policies depend not only on the
status of the pacemaker (the controller) but also of
the heart (the plant), thus formalising the run-time
enforcement problem for cyber-physical systems (2) we
express policies using a variant of discrete timed
automata (DTA), which can cover all regular properties
unlike earlier frameworks limited to safety properties,
(3) we are able to ensure the timing safety of
implantable devices through the proposed enforcement,
and (4) we show that the DTA-based approach is
efficient relative to its dense time variant while
ensuring that the discretisation error is relatively
small and bounded. The developed approach is validated
through a prototype system implemented using the open
source KIELER framework. The experiments show that the
framework incurs minimal runtime overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "178",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2017:BEB,
author = "Qingrui Liu and Xiaolong Wu and Larry Kittinger and
Markus Levy and Changhee Jung",
title = "{BenchPrime}: Effective Building of a Hybrid Benchmark
Suite",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "179:1--179:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126499",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This paper presents BenchPrime, an automated benchmark
analysis toolset that is systematic and extensible to
analyze the similarity and diversity of benchmark
suites. BenchPrime takes multiple benchmark suites and
their evaluation metrics as inputs and generates a
hybrid benchmark suite comprising only essential
applications. Unlike prior work, BenchPrime uses linear
discriminant analysis rather than principal component
analysis, as well as selects the best clustering
algorithm and the optimized number of clusters in an
automated and metric-tailored way, thereby achieving
high accuracy. In addition, BenchPrime ranks the
benchmark suites in terms of their application set
diversity and estimates how unique each benchmark suite
is compared to other suites. As a case study, this work
for the first time compares the DenBench with the
MediaBench and MiBench using four different metrics to
provide a multi-dimensional understanding of the
benchmark suites. For each metric, BenchPrime measures
to what degree DenBench applications are irreplaceable
with those in MediaBench and MiBench. This provides
means for identifying an essential subset from the
three benchmark suites without compromising the
application balance of the full set. The experimental
results show that the necessity of including DenBench
applications varies across the target metrics and that
significant redundancy exists among the three benchmark
suites.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "179",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schuster:2017:DSE,
author = "Simon Schuster and Peter Ulbrich and Isabella
Stilkerich and Christian Dietrich and Wolfgang
Schr{\"o}Der-Preikschat",
title = "Demystifying Soft-Error Mitigation by Control-Flow
Checking --- A New Perspective on its Effectiveness",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "180:1--180:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126503",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Soft errors are a challenging and urging problem in
the domain of safety-critical embedded systems. For
decades, checking schemes have been investigated and
improved to mitigate soft-error effects for the class
of control-flow faults, with current industrial
standards strongly recommending their use. However,
reality looks different: Taking a systems perspective,
we implemented four representative Control-Flow
Checking (CFC) schemes and put them through their paces
in 396 fault-injection campaigns. In contrast to
previous work, which typically relied on
probability-based vulnerability metrics, we accounted
for the influence of memory and time overheads on the
fault-space dimensions and applied those in full-scan
fault injections. This change in procedure alone
severely degraded the perceived effectiveness of CFC.
In addition, we expanded the perspective to data-flow
faults and their influence on the overall
susceptibility, an aspect that so far has been largely
ignored. Our results suggest that, without accompanying
measures, any improvement regarding control-flow faults
is dominated by the increase in data faults caused by
the increased attack surface in terms of memory and
runtime overhead. Moreover, CFC performance less
depended on the detection capabilities than on general
aspects of the concrete binary compilation and
execution. In conclusion, incorporating CFC is not as
straightforward as often assumed and the vulnerability
of systems with hardened control-flow may in many cases
even be increased by the schemes themselves.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "180",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shresthamali:2017:APM,
author = "Shaswot Shresthamali and Masaaki Kondo and Hiroshi
Nakamura",
title = "Adaptive Power Management in Solar Energy Harvesting
Sensor Node Using Reinforcement Learning",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "181:1--181:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126495",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this paper, we present an adaptive power manager
for solar energy harvesting sensor nodes. We use a
simplified model consisting of a solar panel, an ideal
battery and a general sensor node with variable duty
cycle. Our power manager uses Reinforcement Learning
(RL), specifically SARSA($ \lambda $) learning, to
train itself from historical data. Once trained, we
show that our power manager is capable of adapting to
changes in weather, climate, device parameters and
battery degradation while ensuring near-optimal
performance without depleting or overcharging its
battery. Our approach uses a simple but novel general
reward function and leverages the use of weather
forecast data to enhance performance. We show that our
method achieves near perfect energy neutral operation
(ENO) with less than 6\% root mean square deviation
from ENO as compared to more than 23\% deviation that
occur when using other approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "181",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2017:AAS,
author = "Sang-Hoon Kim and Jinkyu Jeong and Jin-Soo Kim",
title = "Application-Aware Swapping for Mobile Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "182:1--182:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126509",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "There has been a constant demand for memory in modern
mobile systems to provide users with better experience.
Swapping is one of the cost-effective software
solutions to provide extra usable memory by reclaiming
inactive pages and improving memory utilization.
However, swapping has not been actively adopted to
mobile systems since it incurs a significant amount of
I/O, which in fact impairs system performance as well
as user experience. In this paper, we propose a novel
scheme to properly harness the swapping to mobile
systems. We identify that a vast amount of I/O for
swapping comes from the conflict of the traditional
page-level approach of the swapping and the
process-level memory management scheme tailored to
mobile systems. Moreover, we find out that the current
victim page selection policy is not effective due to
the process-level policy. To address these problems, we
revise the victim selection policy to resolve the
conflict and to selectively perform swapping according
to the efficacy of swapping. Evaluation using a running
prototype with realistic workloads indicates that the
propose scheme effectively reduces the paging traffic,
thereby improving user experience as well as energy
consumption.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "182",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ji:2017:LDC,
author = "Cheng Ji and Li-Pin Chang and Liang Shi and Congming
Gao and Chao Wu and Yuangang Wang and Chun Jason Xue",
title = "Lightweight Data Compression for Mobile Flash
Storage",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "183:1--183:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126511",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Data compression is beneficial to flash storage
lifespan. However, because the design of mobile flash
storage is highly cost-sensitive, hardware compression
becomes a less attractive option. This study
investigates the feasibility of data compression on
mobile flash storage. It first characterizes data
compressibility based on mobile apps, and the analysis
shows that write traffic bound for mobile storage
volumes is highly compressible. Based on this finding,
a lightweight approach is introduced for firmware-based
data compression in mobile flash storage. The
controller and flash module work in a pipelined fashion
to hide the data compression overhead. Together with
this pipelined design, the proposed approach
selectively compresses incoming data of high
compressibility, while leaving data of low
compressibility to a compression-aware garbage
collector. Experimental results show that our approach
greatly reduced the frequency of block erase by 50.5\%
compared to uncompressed flash storage. Compared to
unconditional data compression, our approach improved
the write latency by 10.4\% at a marginal cost of 4\%
more block erase operations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "183",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Medhat:2017:MPE,
author = "Ramy Medhat and Michael O. Lam and Barry L. Rountree
and Borzoo Bonakdarpour and Sebastian Fischmeister",
title = "Managing the Performance\slash Error Tradeoff of
Floating-point Intensive Applications",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "184:1--184:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126519",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Modern embedded systems are becoming more reliant on
real-valued arithmetic as they employ mathematically
complex vision algorithms and sensor signal processing.
Double-precision floating point is the most commonly
used precision in computer vision algorithm
implementations. A single-precision floating point can
provide a performance boost due to less memory
transfers, less cache occupancy, and relatively faster
mathematical operations on some architectures. However,
adopting it can result in loss of accuracy. Identifying
which parts of the program can run in single-precision
floating point with low impact on error is a manual and
tedious process. In this paper, we propose an automatic
approach to identify parts of the program that have a
low impact on error using shadow-value analysis. Our
approach provides the user with a performance/error
tradeoff, using which the user can decide how much
accuracy can be sacrificed in return for performance
improvement. We illustrate the impact of the approach
using a well known implementation of Apriltag detection
used in robotics vision. We demonstrate that an average
1.3x speedup can be achieved with no impact on tag
detection, and a 1.7x speedup with only 4\% false
negatives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "184",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sogokon:2017:OMP,
author = "Andrew Sogokon and Khalil Ghorbal and Taylor T.
Johnson",
title = "Operational Models for Piecewise-Smooth Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "185:1--185:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126506",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article we study ways of constructing
meaningful operational models of piecewise-smooth
systems (PWS). The systems we consider are described by
polynomial vector fields defined on non-overlapping
semi-algebraic sets, which form a partition of the
state space. Our approach is to give meaning to motion
in systems of this type by automatically synthesizing
operational models in the form of hybrid automata (HA).
Despite appearances, it is in practice often difficult
to arrive at satisfactory HA models of PWS. The
different ways of building operational models that we
explore in our approach can be thought of as defining
different semantics for the underlying PWS. These
differences have a number of interesting nuances
related to phenomena such as chattering,
non-determinism, so-called mythical modes and sliding
behaviour.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "185",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2017:PSV,
author = "Chao Huang and Xin Chen and Wang Lin and Zhengfeng
Yang and Xuandong Li",
title = "Probabilistic Safety Verification of Stochastic Hybrid
Systems Using Barrier Certificates",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "186:1--186:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126508",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The problem of probabilistic safety verification of
stochastic hybrid systems is to check whether the
probability that a given system will reach an unsafe
region from certain initial states can be bounded by
some given probability threshold. The paper considers
stochastic hybrid systems where the behavior is
governed by polynomial equalities and inequalities, as
for usual hybrid systems, but the initial states follow
some stochastic distributions. It proposes a new
barrier certificate based method for probabilistic
safety verification which guarantees the absolute
safety in a infinite time horizon that is beyond the
reach of existing techniques using either statistical
model checking or probabilistic reachable set
computation. It also gives a novel computational
approach, by building and solving a constrained
optimization problem coming from verification
conditions of barrier certificates, to compute the
lower bound on safety probabilities which can be
compared with the given threshold. Experimental
evidence is provided demonstrating the applicability of
our approach on several benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "186",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2017:CRA,
author = "Xin Chen and Sergio Mover and Sriram
Sankaranarayanan",
title = "Compositional Relational Abstraction for Nonlinear
Hybrid Systems",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "187:1--187:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126522",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We propose techniques to construct abstractions for
nonlinear dynamics in terms of relations expressed in
linear arithmetic. Such relations are useful for
translating the closed loop verification problem of
control software with continuous-time, nonlinear plant
models into discrete and linear models that can be
handled by efficient software verification approaches
for discrete-time systems. We construct relations using
Taylor model based flowpipe construction and the
systematic composition of relational abstractions for
smaller components. We focus on developing efficient
schemes for the special case of composing abstractions
for linear and nonlinear components. We implement our
ideas using a relational abstraction system, using the
resulting abstraction inside the verification tool
NuXMV, which implements numerous SAT/SMT solver-based
verification techniques for discrete systems. Finally,
we evaluate the application of relational abstractions
for verifying properties of time triggered controllers,
comparing with the Flow* tool. We conclude that
relational abstractions are a promising approach
towards nonlinear hybrid system verification, capable
of proving properties that are beyond the reach of
tools such as Flow*. At the same time, we highlight the
need for improvements to existing linear arithmetic
SAT/SMT solvers to better support reasoning with large
relational abstractions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "187",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lesi:2017:SAS,
author = "Vuk Lesi and Ilija Jovanov and Miroslav Pajic",
title = "Security-Aware Scheduling of Embedded Control Tasks",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "188:1--188:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126518",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this work, we focus on securing cyber-physical
systems (CPS) in the presence of network-based attacks,
such as Man-in-the-Middle (MitM) attacks, where a
stealthy attacker is able to compromise communication
between system sensors and controllers. Standard
methods for this type of attacks rely on the use of
cryptographic mechanisms, such as Message
Authentication Codes (MACs) to ensure data integrity.
However, this approach incurs significant computation
overhead, limiting its use in resource constrained
systems. Consequently, we consider the problem of
scheduling multiple control tasks on a shared processor
while providing a suitable level of security
guarantees. Specifically, by security guarantees we
refer to control performance, i.e., Quality-of-Control
(QoC), in the presence of attacks. We start by mapping
requirements for QoC under attack into constraints for
security-aware control tasks that, besides standard
control operations, intermittently perform data
authentication. This allows for the analysis of the
impact that security-related computation overhead has
on both schedulability of control tasks and QoC.
Building on this analysis, we introduce a mixed-integer
linear programming-based technique to obtain a
schedulable task set with predefined QoC requirements.
Also, to facilitate optimal resource allocation, we
provide a method to analyze interplay between available
computational resources and the overall QoC under
attack, and show how to obtain a schedulable task set
that maximizes the overall QoC guarantees. Finally, we
prove usability of our approach on a case study with
multiple automotive control components.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "188",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghosh:2017:SMP,
author = "Sumana Ghosh and Souradeep Dutta and Soumyajit Dey and
Pallab Dasgupta",
title = "A Structured Methodology for Pattern based Adaptive
Scheduling in Embedded Control",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "189:1--189:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126514",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Software implementation of multiple embedded control
loops often share compute resources. The control
performance of such implementations have been shown to
improve if the sharing of bandwidth between control
loops can be dynamically regulated in response to input
disturbances. In the absence of a structured
methodology for planning such measures, the scheduler
may spend too much time in deciding the optimal
scheduling pattern. Our work leverages well known
results in the domain of network control systems and
applies them in the context of bandwidth sharing among
controllers. We provide techniques that may be used a
priori for computing co-schedulable execution patterns
for a given set of control loops such that stability is
guaranteed under all possible disturbance scenarios.
Additionally, the design of the control loops optimize
the average case control performance by adaptive
sharing of bandwidth under time varying input
disturbances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "189",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gavran:2017:AMR,
author = "Ivan Gavran and Rupak Majumdar and Indranil Saha",
title = "{Antlab}: a Multi-Robot Task Server",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "190:1--190:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126513",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We present Antlab, an end-to-end system that takes
streams of user task requests and executes them using
collections of robots. In Antlab, each request is
specified declaratively in linear temporal logic
extended with quantifiers over robots. The user does
not program robots individually, nor know how many
robots are available at any time or the precise state
of the robots. The Antlab runtime system manages the
set of robots, schedules robots to perform tasks,
automatically synthesizes robot motion plans from the
task specification, and manages the co-ordinated
execution of the plan. We provide a constraint-based
formulation for simultaneous task assignment and plan
generation for multiple robots working together to
satisfy a task specification. In order to scalably
handle multiple concurrent tasks, we take a separation
of concerns view to plan generation. First, we solve
each planning problem in isolation, with an ``ideal
world'' hypothesis that says there are no unspecified
dynamic obstacles or adversarial environment actions.
Second, to deal with imprecisions of the real world, we
implement the plans in receding horizon fashion on top
of a standard robot navigation stack. The motion
planner dynamically detects environment actions or
dynamic obstacles from the environment or from other
robots and locally corrects the ideal planned path. It
triggers a re-planning step dynamically if the current
path deviates from the planned path or if planner
assumptions are violated. We have implemented Antlab as
a C++ and Python library on top of robots running on
ROS, using SMT-based and AI planning-based
implementations for task and path planning. We
evaluated Antlab both in simulation as well as on a set
of TurtleBot robots. We demonstrate that it can provide
a scalable and robust infrastructure for declarative
multi-robot programming.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "190",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2018:ETS,
author = "Sandeep K. Shukla",
title = "Editorial: Trust and Security Must Become a Primary
Design Concern in Embedded Computing",
journal = j-TECS,
volume = "17",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173385",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2018:GEA,
author = "Jiming Chen and Yu (Jason) Gu and Gil Zussman",
title = "Guest Editorial for {ACM TECS}: Special Issue on
Autonomous Battery-Free Sensing and Communication",
journal = j-TECS,
volume = "17",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3127494",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2018:HEW,
author = "Qi Chen and Ye Liu and Guangchi Liu and Qing Yang and
Xianming Shi and Hongwei Gao and Lu Su and Quanlong
Li",
title = "Harvest Energy from the Water: a Self-Sustained
Wireless Water Quality Sensing System",
journal = j-TECS,
volume = "17",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047646",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Water quality data is incredibly important and
valuable, but its acquisition is not always trivial. A
promising solution is to distribute a wireless sensor
network in water to measure and collect the data;
however, a drawback exists in that the batteries of the
system must be replaced or recharged after being
exhausted. To mitigate this issue, we designed a
self-sustained water quality sensing system that is
powered by renewable bioenergy generated from microbial
fuel cells (MFCs). MFCs collect the energy released
from native magnesium oxidizing microorganisms (MOMs)
that are abundant in natural waters. The proposed
energy-harvesting technology is environmentally
friendly and can provide maintenance-free power to
sensors for several years. Despite these benefits, an
MFC can only provide microwatt-level power that is not
sufficient to continuously power a sensor. To address
this issue, we designed a power management module to
accumulate energy when the input voltage is as low as
0.33V. We also proposed a radio-frequency (RF)
activation technique to remotely activate sensors that
otherwise are switched off in default. With this
innovative technique, a sensor's energy consumption in
sleep mode can be completely avoided. Additionally,
this design can enable on-demand data acquisitions from
sensors. We implement the proposed system and evaluate
its performance in a stream. In 3-month field
experiments, we find the system is able to reliably
collect water quality data and is robust to environment
changes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gomez:2018:ELT,
author = "Andres Gomez and Lukas Sigrist and Thomas Schalch and
Luca Benini and Lothar Thiele",
title = "Efficient, Long-Term Logging of Rich Data Sensors
Using Transient Sensor Nodes",
journal = j-TECS,
volume = "17",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3047499",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "While energy harvesting is generally seen to be the
key to power cyber-physical systems in a low-cost,
long-term, efficient manner, it has generally required
large energy storage devices to mitigate the effects of
the source's variability. The emerging class of
transiently powered systems embrace this variability by
performing computation in proportion to the energy
harvested, thereby minimizing the obtrusive and
expensive storage element. By using an efficient Energy
Management Unit (EMU), small bursts of energy can be
buffered in an optimally sized capacitor and used to
supply generic loads, even when the average harvested
power is only a fraction of that required for sustained
system operation. Dynamic Energy Burst Scaling (DEBS)
can be used by the load to dynamically configure the
EMU to supply small bursts of energy at its optimal
power point, independent from the harvester's operating
point. Parameters like the maximum burst size, the
solar panel's area, as well as the use of
energy-efficient Non-Volatile Memory Hierarchy (NVMH)
can have a significant impact on the transient system's
characteristics such as the wake-up time and the amount
of work that can be done per unit of energy.
Experimental data from a solar-powered, long-term
autonomous image acquisition application show that,
regardless of its configuration, the EMU can supply
energy bursts to a 43.4mW load with efficiencies of up
to 79.7\% and can work with input power levels as low
as 140 $ \mu $W. When the EMU is configured to use DEBS
and NVMH, the total energy cost of acquiring,
processing and storing an image can be reduced by
77.8\%, at the price of increasing the energy buffer
size by 65\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2018:TAB,
author = "Zejue Wang and Hongjia Li and Dan Hu and Song Ci",
title = "Transmission Adaptation for Battery-Free Relaying",
journal = j-TECS,
volume = "17",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3055513",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy harvesting (EH)-enabled relaying has attracted
considerable attention as an effective way to prolong
the operation time of energy-constrained networks and
extend coverage beside desired survivability and rate
of transmission. In related literature, the
Harvest-Store-Use (HSU) model is usually utilized to
describe the energy flow behavior of the EH system.
However, the half-duplex (HD) constraint of HSU that
harvested energy can only be used after being
temporally stored in energy buffer may reduce effective
transmission time. Thus, we first construct the
full-duplex (FD) energy flow behavior model of the EH
system where the harvested energy can be tuned to power
load and being stored simultaneously. The FD model is
then proved to be equivalent with the HSU model when
time interval is small enough. Considering some key
physical variabilities, for example, the wireless
channel and the amount of harvested energy, the
transmission adaptation problem for multiple relays
embedded with FD EH systems is formulated with the
objective to improve the utilization of the harvested
energy. We tackle the problem by using a centralized
optimization algorithm by jointly tuning the factors,
including power control for source and relay nodes,
relay selection and dynamic switching among four relay
transmission mode, namely HD amplify-and-forward (AF),
HD decode-and-forward (DF), FD AF, and FD DF. The
centralized optimization algorithm is proposed on the
basis of dual decomposition and serves as a benchmark.
To enable relays to individually make their own
decisions, a distributed algorithm with relatively
higher complexity is given by using consensus
optimization in conjunction with the alternating
direction method of multipliers, and a sub-optimal
algorithm with low complexity is provided. The proposed
algorithms are shown to have good performance via
simulations for a range of different EH rates and
prediction errors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2018:STW,
author = "Zhongqin Wang and Fu Xiao and Ning Ye and Ruchuan Wang
and Panlong Yang",
title = "A See-through-Wall System for Device-Free Human Motion
Sensing Based on Battery-Free {RFID}",
journal = j-TECS,
volume = "17",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3055515",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A see-through-wall system can be used in life
detection, military fields, elderly people
surveillance. and gaming. The existing systems are
mainly based on military devices, customized signals or
pre-deployed sensors inside the room, which are very
expensive and inaccessible for general use. Recently, a
low-cost RFID technology has gained a lot of attention
in this field. Since phase estimates of a battery-free
RFID tag collected by a commercial off-the-shelf (COTS)
RFID reader are sensitive to external interference, the
RFID tag could be regarded as a battery-free sensor
that detects reflections off targeted objects. The
existing RFID-based system, however, needs to first
learn the environment of the empty room beforehand to
separate reflections off the tracked target. Besides,
it can only track low-speed metal objects with
high-positioning accuracy. Since the human body with
its complex surface has a weaker ability to reflect
radio frequency (RF) signals than metal objects, a
battery-free RFID tag can capture only a subset of the
reflections off the human body. To address these
challenges, a RFID-based human motion sensing
technology, called RF-HMS, is presented to track
device-free human motion through walls. At first, we
construct transfer functions of multipath channel based
on phase and RSSI measurements to eliminate device
noise and reflections off static objects like walls and
furniture without learning the environment of the empty
room before. Then a tag planar array is grouped by many
battery-free RFID tags to improve the sensing
performance. RF-HMS combines reflections from each RFID
tag into a reinforced result. On this basis, we extract
phase shifts to detect the absence or presence of any
moving persons and further derive the reflections off a
single moving person to identify his/her forward or
backward motion direction. The results show that RF-HMS
can effectively detect the absence or presence of
moving persons with 100\% accuracy and keep a high
accuracy of more than 90\% to track human motion
directions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2018:OOP,
author = "Chi Lin and Yanhong Zhou and Houbing Song and Chang Wu
Yu and Guowei Wu",
title = "{OPPC}: an Optimal Path Planning Charging Scheme Based
on Schedulability Evaluation for {WRSNs}",
journal = j-TECS,
volume = "17",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126684",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The lack of schedulability evaluation of previous
charging schemes in wireless rechargeable sensor
networks (WRSNs) degrades the charging efficiency,
leading to node exhaustion. We propose an Optimal Path
Planning Charging scheme, namely OPPC, for the
on-demand charging architecture. OPPC evaluates the
schedulability of a charging mission, which makes
charging scheduling predictable. It provides an optimal
charging path which maximizes charging efficiency. When
confronted with a non-schedulable charging mission, a
node discarding algorithm is developed to enable the
schedulability. Experimental simulations demonstrate
that OPPC can achieve better performance in successful
charging rate as well as charging efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2018:JOS,
author = "Hang Hu and Hang Zhang and Jianxin Guo and Feng Wang",
title = "Joint Optimization of Sensing and Power Allocation in
Energy-Harvesting Cognitive Radio Networks",
journal = j-TECS,
volume = "17",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070709",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The energy-harvesting cognitive radio (CR) network is
proposed to improve the spectrum efficiency and energy
efficiency. We focus on the optimization of sensing
time and power allocation to maximize the throughput of
the energy-harvesting CR network subject to the energy
causality constraint and collision constraint. Based on
the classification of operating regions, the
optimization problem is divided into two sub-problems.
Then, the efficient iterative Algorithm 1 and Algorithm
2 are proposed to solve sub-problem (A) and sub-problem
(B), respectively. Numerical results show that a
significant improvement in the throughput is achieved
via joint optimization of sensing time and power
allocation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2018:RRA,
author = "Die Wu and Li Lu and Muhammad Jawad Hussain and
Songfan Li and Mo Li and Fengli Zhang",
title = "{$ R^3 $}: Reliable Over-the-Air Reprogramming on
Computational {RFIDs}",
journal = j-TECS,
volume = "17",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070720",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Computational Radio Frequency Identification (CRFID)
tags operate solely on harvested energy and have
emerged as viable platforms for a variety of ubiquitous
sensing and computation applications. Due to their
battery-less nature, these tags can be permanently
deployed in hard-to-reach places where the possibility
of tag access is eliminated. In such scenarios,
maintaining and upgrading the tag's firmware becomes
infeasible because programming tools, including wired
interface and PC-based software, are required to erase,
modify, or reprogram the microcontroller unit's memory.
Such limitations necessitate the demand for an
over-the-air (OTA) scheme, which can wirelessly
reprogram or upgrade the firmware in CRFID tags. In
this article, we present $ R^3 $ --- a reliable OTA
reprogramming scheme that is compliant with EPC
protocol and requires no hardware upgrade to RFID
reader or CRFID tag. We demonstrate our scheme on three
platforms, which include both software-defined as well
as chip-based CRFID tags, that is, WISP5.1 and
Optimized WISP (Opt-WISP), and Spider tag,
respectively. The selection also includes both the
FLASH- and FRAM-based microcontrollers. We extensively
evaluate our scheme in terms of several metrics,
including overall system delay, time and energy
overhead, and success rate in line with interrogation
range. We foresee our endeavor to offer the viability
of OTA reprogramming and firmware upgrade for CRFID
tokens under practical situations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2018:NOC,
author = "Songyuan Li and Lingkun Fu and Shibo He and Youxian
Sun",
title = "Near-Optimal Co-Deployment of Chargers and Sink
Stations in Rechargeable Sensor Networks",
journal = j-TECS,
volume = "17",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3070721",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless charging technology has drawn great attention
of both academia and industry in recent years, due to
its potential of significantly improving the system
performance of sensor networks. The emergence of an
open-source experimental platform for wireless
rechargeable sensor networks, Powercast, has made the
theoretical research closer to reality. This pioneering
platform is able to recharge sensor nodes much more
efficiently and allows different communication
protocols to be implemented upon users' demands.
Different from the RFID-based model widely used in the
existing works, Powercast designs the charger and sink
station separately. This leads to a new design
challenge of cooperatively deploying minimum number of
chargers and sink stations in wireless rechargeable
sensor networks. Such a co-deployment issue is
extremely challenging, since the deployments of
chargers and sink stations are coupled, and each
subproblem is known to be NP-hard. The key to the
design is to understand the intrinsic relationship
between data flow and energy flow, which is
interdependent. In this article, we tackle this
challenge by dividing it into two subproblems and
optimizing charger and sink station deployment
iteratively. Specifically, we first transform each
subproblem to a max-flow problem. With this, we are
able to select chargers or sink stations according to
their contributions to the total flow rate. We design
greedy-based algorithms with a guaranteed worst-case
bound $ \ln R / \xi $ for the subproblems of charger
deployment and sink station deployment, respectively.
Further, we address the original problem by designing
an iterative algorithm that solves two subproblems
alternatively to achieve a near optimal performance. We
corroborate our analysis by extensive simulations under
practical coefficient settings and demonstrate the
advantage of the proposed algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wagemann:2018:OEN,
author = "Peter W{\"a}gemann and Tobias Distler and Heiko Janker
and Phillip Raffeck and Volkmar Sieh and Wolfgang
Schr{\"o}der-Preikschat",
title = "Operating Energy-Neutral Real-Time Systems",
journal = j-TECS,
volume = "17",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3078631",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Energy-neutral real-time systems harvest the entire
energy they use from their environment. In such
systems, energy must be treated as an equally important
resource as time, which creates the need to solve a
number of problems that so far have not been addressed
by traditional real-time systems. In particular, this
includes the scheduling of tasks with both time and
energy constraints, the monitoring of energy budgets,
as well as the survival of blackout periods during
which not enough energy is available to keep the system
fully operational. In this article, we address these
issues presenting E nOS, an operating-system kernel for
energy-neutral real-time systems. EnOS considers mixed
time criticality levels for different energy
criticality modes, which enables a decoupling of time
and energy constraints when one is considered less
critical than the other. When switching the energy
criticality mode, the system also changes the set of
executed tasks and is therefore able to dynamically
adapt its energy consumption depending on external
conditions. By keeping track of the energy budget
available, EnOS ensures that in case of a blackout the
system state is safely stored to persistent memory,
allowing operations to resume at a later point when
enough energy is harvested again.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rajib:2018:PRI,
author = "MD. Majharul Islam Rajib and Asis Nasipuri",
title = "Predictive Retransmissions for Intermittently
Connected Sensor Networks with Transmission Diversity",
journal = j-TECS,
volume = "17",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092947",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Batteryless wireless sensor networks that rely on
energy harvested from the environment often exhibit
random power outages due to limitations of energy
resources, which give rise to intermittent connectivity
and long transmission delays. To improve the delay
performance in such networks, we consider a design
strategy that uses predictive retransmissions to
maximize the probability of success for each
transmission. This is applied to two different
transmission diversity schemes: cooperative relaying
over unicast routes and opportunistic routing.
Performance evaluations from theoretical models and
simulations are presented that show that significant
gains can be achieved using the proposed approach in
such networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xu:2018:GEP,
author = "Chi Xu and Wei Liang and Haibin Yu",
title = "Green-Energy-Powered Cognitive Radio Networks: Joint
Time and Power Allocation",
journal = j-TECS,
volume = "17",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092949",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article studies a green-energy-powered cognitive
radio network (GCRN) in an underlay paradigm, wherein
multiple battery-free secondary users (SUs) capture
both the spectrum and the energy of primary users (PUs)
to communicate with an access point (AP). By time
division multiple access, each SU transmits data to AP
in the allocated time and harvests energy from the RF
signals of PUs otherwise, all in the same licensed
spectrum concurrently with PUs. Thus, the transmit
power of each SU is jointly constrained by the peak
interference power at PU and the harvested energy of
SU. With the formulated green coexistence paradigm, we
investigate the sum-throughput maximization problem
with respect to time and power allocation, which is
non-convex. To obtain the optimal resource allocation,
we propose a joint optimal time and power allocation
(JOTPA) algorithm that first transforms the original
problem into a convex optimization problem with respect
to time and energy allocation, and then solve it by
iterative Lagrange dual decomposition. To
comprehensively evaluate the performance of the GCRN
with JOTPA, we deploy the GCRN in three typical
scenarios and compare JOTPA with the equal time and
optimal power allocation (ETOPA) algorithm. Extensive
simulations show that the deployment of the GCRN
significantly influences the throughput performance and
JOTPA outperforms ETOPA under all considered
scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Eles:2018:GES,
author = "Petru Eles and J{\"o}rg Henkel",
title = "Guest Editorial for the Special Issue of {ESWEEK
2016}",
journal = j-TECS,
volume = "17",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3152097",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hegde:2018:CAC,
author = "Gopalakrishna Hegde and Siddhartha and Nachiket
Kapre",
title = "{CaffePresso}: Accelerating Convolutional Networks on
Embedded {SoCs}",
journal = j-TECS,
volume = "17",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105925",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Auto-tuning and parametric implementation of deep
learning kernels allow off-the-shelf accelerator-based
embedded platforms to deliver high-performance and
energy-efficient mappings of the inference phase of
lightweight neural networks. Low-complexity classifiers
are characterized by operations on small image maps
with two to three deep layers and few class labels. For
these use cases, we consider a range of embedded
systems with 20W power budgets such as the Xilinx ZC706
(FPGA), NVIDIA Jetson TX1 (GPU), TI Keystone II (DSP),
and Adapteva Parallella (RISC+NoC). In CaffePresso, we
combine auto-tuning of the implementation parameters,
and platform-specific constraints deliver optimized
solutions for each input ConvNet specification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tan:2018:LLP,
author = "Cheng Tan and Aditi Kulkarni and Vanchinathan
Venkataramani and Manupa Karunaratne and Tulika Mitra
and Li-Shiuan Peh",
title = "{LOCUS}: Low-Power Customizable Many-Core Architecture
for Wearables",
journal = j-TECS,
volume = "17",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3122786",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Application requirements, such as real-time response,
are pushing wearable devices to leverage more powerful
processors inside the SoC (system on chip). However,
existing wearable devices are not well suited for such
challenging applications due to poor performance, and
the conventional powerful many-core architectures are
not appropriate either due to the stringent power
budget in this domain. We propose LOCUS-a low-power,
customizable, many-core processor for next-generation
wearable devices. LOCUS combines customizable processor
cores with a customizable network on a message-passing
architecture to deliver very competitive
performance/watt-an average $ 3.1 \times $ compared to
quad-core ARM processors used in state-of-the-art
wearable devices. A combination of full system
simulation with representative applications from the
wearable domain and RTL synthesis of the architecture
show that 16-core LOCUS achieves an average $ 1.52
\times $ performance/watt improvement over a
conventional 16-core shared memory many-core
architecture. A dynamic power management mechanism is
proposed to further decrease the power consumption in
both computation and communication, which improves the
performance/watt of LOCUS by $ 1.17 \times $.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sutar:2018:DPI,
author = "Soubhagya Sutar and Arnab Raha and Devadatta Kulkarni
and Rajeev Shorey and Jeffrey Tew and Vijay
Raghunathan",
title = "{D-PUF}: an Intrinsically Reconfigurable {DRAM PUF}
for Device Authentication and Random Number
Generation",
journal = j-TECS,
volume = "17",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105915",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Physically Unclonable Functions (PUFs) have proved to
be an effective and low-cost measure against
counterfeiting by providing device authentication and
secure key storage services. Memory-based PUF
implementations are an attractive option due to the
ubiquitous nature of memory in electronic devices and
the requirement of minimal (or no) additional
circuitry. Dynamic Random Access Memory-- (DRAM) based
PUFs are particularly advantageous due to their large
address space and multiple controllable parameters
during response generation. However, prior works on
DRAM PUFs use a static response-generation mechanism
making them vulnerable to security attacks. Further,
they result in slow device authentication, are not
applicable to commercial off-the-shelf devices, or
require DRAM power cycling prior to authentication. In
this article, we propose D-PUF, an intrinsically
reconfigurable DRAM PUF based on the idea of DRAM
refresh pausing. A key feature of the proposed DRAM PUF
is reconfigurability, that is, by varying the DRAM
refresh-pause interval, the challenge-response behavior
of the PUF can be altered, making it robust to various
attacks. The article is broadly divided into two parts.
In the first part, we demonstrate the use of D-PUF in
performing device authentication through a secure,
low-overhead methodology. In the second part, we show
the generation of true random numbers using D-PUF. The
design is implemented and validated using an Altera
Stratix IV GX FPGA-based Terasic TR4-230 development
board and several off-the-shelf 1GB DDR3 DRAM modules.
Our experimental results demonstrate a $ 4.3 \times
$--$ 6.4 \times $ reduction in authentication time
compared to prior work. Using controlled temperature
and accelerated aging tests, we also demonstrate the
robustness of our authentication mechanism to
temperature variations and aging effects. Finally, the
ability of the design to generate random numbers is
verified using the NIST Statistical Test Suite.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guo:2018:IWP,
author = "Jie Guo and Chuhan Min and Tao Cai and Yiran Chen",
title = "Improving Write Performance and Extending Endurance of
Object-Based {NAND} Flash Devices",
journal = j-TECS,
volume = "17",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105924",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Write amplification is a major cause of performance
and endurance degradations in NAND flash-based storage
systems. In an object-based NAND flash device (ONFD),
two causes of write amplification are onode partial
update and cascading update. Here, onode is a type of
small-sized object metadata, and multiple onodes are
stored in one NAND flash page. Updating one onode
invokes partial page update (i.e., onode partial
update), incurring unnecessary migration of the
un-updated data. Cascading update denotes updating
object metadata in a cascading manner due to object
data update or migration. Although there are only
several bytes that need to be updated in the object
metadata, one or more pages have to be re-written
accordingly. In this work, we propose a system design
to alleviate the write amplification issue in the
object-based NAND flash device. The proposed design
includes (1) a multi-level garbage collection technique
to minimize unnecessary data migration incurred by
onode partial update and (2) a B+ table tree,
Semantics-Aware Flexible (SAF) data layout, and
selective cache design to reduce the write operations
associated with cascading update. To guarantee system
consistency, we also propose a power failure handling
technique. Experiment results show that our proposed
design can achieve up to 20\% write reduction compared
to the best states of the art.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Maier:2018:FIT,
author = "Petra R. Maier and Veit B. Kleeberger and Daniel
Mueller-Gritschneder and Ulf Schlichtmann",
title = "Fault Injection for Test-Driven Development of Robust
{SoC} Firmware",
journal = j-TECS,
volume = "17",
number = "1",
pages = "19:1--19:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092943",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Robustness against errors in hardware must be
considered from the very beginning of safety-critical
system-on-chip firmware design. Therefore, we present
fault injection for test-driven development (TDD) of
robust firmware. As TDD is based on instant feedback to
the designer, fault injection must execute within few
minutes. In contrast to state-of-the-art approaches, we
avoid long simulation scenarios and runtimes by
injecting faults at the unit level and utilizing
host-compiled simulation. Further, three static
bit-level analyses of firmware source code and hardware
specification reduce the fault set significantly. This
accelerates fault injection by several orders of
magnitude and enables robustness-aware TDD.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Balkan:2018:UFA,
author = "Ayca Balkan and Paulo Tabuada and Jyotirmoy V.
Deshmukh and Xiaoqing Jin and James Kapinski",
title = "{Underminer}: a Framework for Automatically
Identifying Nonconverging Behaviors in Black-Box System
Models",
journal = j-TECS,
volume = "17",
number = "1",
pages = "20:1--20:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3122787",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Evaluation of industrial embedded control system
designs is a time-consuming and imperfect process.
While an ideal process would apply a formal
verification technique such as model checking or
theorem proving, these techniques do not scale to
industrial design problems, and it is often difficult
to use these techniques to verify performance aspects
of control system designs, such as stability or
convergence. For industrial designs, engineers rely on
testing processes to identify critical or unexpected
behaviors. We propose a novel framework called
Underminer to improve the testing process; this is an
automated technique to identify nonconverging behaviors
in embedded control system designs. Underminer treats
the system as a black box and lets the designer
indicate the model parameters, inputs, and outputs that
are of interest. It differentiates convergent from
nonconvergent behaviors using Convergence Classifier
Functions (CCFs). The tool can be applied in the
context of testing models created late in the
controller development stage, where it assumes that the
given model displays mostly convergent behavior and
learns a CCF in an unsupervised fashion from such
convergent model behaviors. This CCF is then used to
guide a thorough exploration of the model with the help
of optimization-guided techniques or adaptive sampling
techniques, with the goal of identifying rare
nonconvergent model behaviors. Underminer can also be
used early in the development stage, where models may
have some significant nonconvergent behaviors. Here,
the framework permits designers to indicate their
mental model for convergence by labeling behaviors as
convergent/nonconvergent and then constructs a CCF
using a supervised learning technique. In this use
case, the goal is to use the CCF to test an improved
design for the model. Underminer supports a number of
convergence-like notions, such as those based on
Lyapunov analysis and temporal logic, and also CCFs
learned directly from labeled output behaviors using
machine-learning techniques such as support vector
machines and neural networks. We demonstrate the
efficacy of Underminer by evaluating its performance on
several academic as well as industrial examples.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fan:2018:SDR,
author = "Chuchu Fan and James Kapinski and Xiaoqing Jin and
Sayan Mitra",
title = "Simulation-Driven Reachability Using Matrix Measures",
journal = j-TECS,
volume = "17",
number = "1",
pages = "21:1--21:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126685",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Simulation-driven verification can provide formal
safety guarantees for otherwise intractable nonlinear
and hybrid system models. A key step in
simulation-driven algorithms is to compute the reach
set overapproximations from a set of initial states
through numerical simulations and sensitivity analysis.
This article addresses this problem by providing
algorithms for computing discrepancy functions as the
upper bound on the sensitivity, that is, the rate at
which trajectories starting from neighboring states
converge or diverge. The algorithms rely on computing
local bounds on matrix measures as the exponential
change rate of the discrepancy function. We present two
techniques to compute the matrix measures under
different norms: regular Euclidean norm or Euclidean
norm under coordinate transformation, such that the
exponential rate of the discrepancy function, and
therefore, the conservativeness of the
overapproximation, is locally minimized. The proposed
algorithms enable automatic reach set computations of
general nonlinear systems and have been successfully
used on several challenging benchmark models. All
proposed algorithms for computing discrepancy functions
give soundness and relative completeness of the overall
simulation-driven safety-bounded verification
algorithm. We present a series of experiments to
illustrate the accuracy and performance of the
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2018:PSC,
author = "Hyoseung Kim and Ragunathan (Raj) Rajkumar",
title = "Predictable Shared Cache Management for Multi-Core
Real-Time Virtualization",
journal = j-TECS,
volume = "17",
number = "1",
pages = "22:1--22:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092946",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Real-time virtualization has gained much attention for
the consolidation of multiple real-time systems onto a
single hardware platform while ensuring timing
predictability. However, a shared last-level cache
(LLC) on modern multi-core platforms can easily hamper
the timing predictability of real-time virtualization
due to the resulting temporal interference among
consolidated workloads. Since such interference caused
by the LLC is highly variable and may have not even
existed in legacy systems to be consolidated, it poses
a significant challenge for real-time virtualization.
In this article, we propose a predictable shared cache
management framework for multi-core real-time
virtualization. Our framework introduces two
hypervisor-level techniques, vLLC and vColoring, that
enable the cache allocation of individual tasks running
in a virtual machine (VM), which is not achievable by
the current state of the art. Our framework also
provides a cache management scheme that determines
cache allocation to tasks, designs VMs in a cache-aware
manner, and minimizes the aggregated utilization of VMs
to be consolidated. As a proof of concept, we
implemented vLLC and vColoring in the KVM hypervisor
running on x86 and ARM multi-core platforms.
Experimental results with three different guest OSs
(i.e., Linux/RK, vanilla Linux, and MS Windows
Embedded) show that our techniques can effectively
control the cache allocation of tasks in VMs. Our cache
management scheme yields a significant utilization
benefit compared to other approaches while satisfying
timing constraints.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kahkonen:2018:TPC,
author = "Kari K{\"a}hk{\"o}nen and Keijo Heljanko",
title = "Testing Programs with Contextual Unfoldings",
journal = j-TECS,
volume = "17",
number = "1",
pages = "23:1--23:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/2810000",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present a new algorithm that
combines contextual unfoldings and dynamic symbolic
execution to systematically test multithreaded
programs. The approach uses symbolic execution to limit
the number of input values and unfoldings to thus limit
the number of thread interleavings that are needed to
cover reachable local states of threads in the program
under test. We show that the use of contextual
unfoldings allows interleavings of threads to be
succinctly represented. This can in some cases lead to
a substantial reduction in the number of needed test
executions when compared to previous approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gu:2018:EST,
author = "Xiaozhe Gu and Arvind Easwaran",
title = "Efficient Schedulability Test for Dynamic-Priority
Scheduling of Mixed-Criticality Real-Time Systems",
journal = j-TECS,
volume = "17",
number = "1",
pages = "24:1--24:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105922",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Systems in many safety-critical application domains
are subject to certification requirements. In such a
system, there are typically different applications
providing functionalities that have varying degrees of
criticality. Consequently, the certification
requirements for functionalities at these different
criticality levels are also varying, with very high
levels of assurance required for a highly critical
functionality, whereas relatively low levels of
assurance are required for a less critical
functionality. Considering the timing assurance given
to various applications in the form of guaranteed
budgets within deadlines, a theory of real-time
scheduling for such multi-criticality systems has been
recently under development. In particular, an algorithm
called Earliest Deadline First with Virtual Deadlines
(EDF-VD) has shown a lot of promise for systems with
two criticality levels, especially in terms of
practical performance demonstrated through experiment
results. In this article, we design a new
schedulability test for EDF-VD that extends these
performance benefits to multi-criticality systems. We
propose a new test based on demand bound functions and
also present a novel virtual deadline assignment
strategy. Through extensive experiments, we show that
the proposed technique significantly outperforms
existing strategies for a variety of generic real-time
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kulkarni:2018:LOC,
author = "Amey Kulkarni and Colin Shea and Tahmid Abtahi and
Houman Homayoun and Tinoosh Mohsenin",
title = "Low Overhead {CS}-Based Heterogeneous Framework for
Big Data Acceleration",
journal = j-TECS,
volume = "17",
number = "1",
pages = "25:1--25:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092944",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Big data processing on hardware gained immense
interest among the hardware research community to take
advantage of fast processing and reconfigurability.
Though the computation latency can be reduced using
hardware, big data processing cost is dominated by data
transfers. In this article, we propose a low overhead
framework based on compressive sensing (CS) to reduce
data transfers up to 67\% without affecting signal
quality. CS has two important kernels: ``sensing'' and
``reconstruction.'' In this article, we focus on CS
reconstruction is using orthogonal matching pursuit
(OMP) algorithm. We implement the OMP CS reconstruction
algorithm on a domain-specific PENC many-core platform
and a low-power Jetson TK1 platform consisting of an
ARM CPU and a K1 GPU. Detailed performance analysis of
OMP algorithm on each platform suggests that the PENC
many-core platform has $ 15 \times $ and $ 18 \times $
less energy consumption and $ 16 \times $ and $ 8
\times $ faster reconstruction time as compared to the
low-power ARM CPU and K1 GPU, respectively.
Furthermore, we implement the proposed CS-based
framework on heterogeneous architecture, in which the
PENC many-core architecture is used as an
``accelerator'' and processing is performed on the ARM
CPU platform. For demonstration, we integrate the
proposed CS-based framework with a Hadoop MapReduce
platform for a face detection application. The results
show that the proposed CS-based framework with the PENC
many-core as an accelerator achieves a 26.15\% data
storage/transfer reduction, with an execution time and
energy consumption overhead of 3.7\% and 0.002\%,
respectively, for 5,000 image transfers. Compared to
the CS-based framework implementation on the low-power
Jetson TK1 ARM CPU+GPU platform, the PENC many-core
implementation is $ 2.3 \times $ faster for the image
reconstruction part, while achieving 29\% higher
performance and 34\% better energy efficiency for the
complete face detection application on the Hadoop
MapReduce platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nodeh:2018:EAM,
author = "Mohammad Taghi Teimoori Nodeh and Mostafa Bazzaz and
Alireza Ejlali",
title = "Exploiting Approximate {MLC-PCM} in Low-Power Embedded
Systems",
journal = j-TECS,
volume = "17",
number = "1",
pages = "26:1--26:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105926",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multi-level cell phase change memory (MLC-PCM),
because of its very low leakage power and high density,
is promising for embedded systems. Furthermore, for
applications with inherent low sensitivity to errors,
approximate write operations can be exploited in
MLC-PCM to improve endurance and performance. However,
data that reside in the approximate MLC-PCM for a
rather long time without refreshing are prone to soft
errors due to resistance drift phenomenon, while even
for an application with inherent low sensitivity to
errors, a high soft error rate can degrade its Quality
of Result (QoR). The architecture-level approaches to
decrease the drift effect incur considerable power
overhead (about 100\%), which is a prominent issue in
embedded systems, and are dependent on the number of
logic levels stored in the PCM cell (e.g., most of them
are designed for 4LC-PCM). This article, taking a
different approach, proposes a drift-aware frequency
and voltage management to alleviate the drift-based
soft-error rate. To this end, first we characterize the
application data based on the degree of being exposed
to the drift to identify the drift-prone application
data. Then we assign the execution frequency and
voltage to different regions of the application
considering the drift. This frequency assignment speeds
up the application regions wherein the drift-prone data
are accessed to shorten the lifetime of the drift-prone
data, thereby decreasing the soft error rate. An
integer linear programming model implements our
proposed Dynamic Voltage Frequency Scaling (DVFS).
Also, the proposed approach is independent of the
number of levels of PCM cells and can be applied to any
MLC-PCM system. To evaluate the approach, the
approximate MLC-PCM is simulated using empirical models
and is integrated into a full-system simulator as data
memory. The experimental results show that, by
exploiting the approach, QoR is in the acceptable
range, while its power overhead is about 84\% (on
average) less than that of the architecture-level
approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gaglio:2018:DPD,
author = "Salvatore Gaglio and Giuseppe {Lo Re} and Gloria
Martorella and Daniele Peri",
title = "{DC4CD}: a Platform for Distributed Computing on
Constrained Devices",
journal = j-TECS,
volume = "17",
number = "1",
pages = "27:1--27:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105923",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present Distributed Computing for
Constrained Devices (DC4CD), a novel software
architecture that supports symbolic distributed
computing on wireless sensor networks. DC4CD integrates
the functionalities of a high-level symbolic
interpreter, a compiler, and an operating system, and
includes networking abstractions to exchange high-level
symbolic code among peer devices. Contrarily to other
architectures proposed in the literature, DC4CD allows
for changes at runtime, even on deployed nodes of both
application and system code. Experimental results show
that DC4CD is more efficient in terms of memory usage
than existing architectures, with which it also
compares well in terms of execution efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Namazi:2018:MBR,
author = "Alireza Namazi and Meisam Abdollahi and Saeed Safari
and Siamak Mohammadi",
title = "A Majority-Based Reliability-Aware Task Mapping in
High-Performance Homogeneous {NoC} Architectures",
journal = j-TECS,
volume = "17",
number = "1",
pages = "28:1--28:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3131273",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article presents a new reliability-aware task
mapping approach in a many-core platform at design time
for applications with DAG-based task graphs. The main
goal is to devise a task mapping which meets a
predefined reliability threshold considering a
minimized performance degradation. The proposed
approach uses a majority-voting replication technique
to fulfill error-masking capability. A quantitative
reliability model is also proposed for the platform.
Our platform is a homogeneous many-core architecture
with mesh-based interconnection using traditional
deterministic XY routing algorithm. Our iterative
approach is applicable to an unlimited number of system
fault types. All parts of the platform, including
cores, links, and routers, are assumed to be prone to
failures. We used the MNLP optimization technique to
find the optimal mapping of the presented task graph.
Experimental results show that our suggested task
mappings not only comply with predefined reliability
thresholds but also achieve notable time complexity
reduction with respect to exhaustive space
exploration.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2018:EIC,
author = "Sandeep K. Shukla",
title = "Editorial: {Industry 4.0} --- a Confluence of Embedded
Artificial Intelligence, Machine Learning, Robotics and
Security",
journal = j-TECS,
volume = "17",
number = "2",
pages = "29:1--29:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3194944",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leonard:2018:GES,
author = "Elizabeth Leonard",
title = "Guest Editorial: Special Issue on Formal Methods and
Models for System Design",
journal = j-TECS,
volume = "17",
number = "2",
pages = "30:1--30:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162079",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tanase:2018:SML,
author = "Alexandru Tanase and Michael Witterauf and J{\"u}rgen
Teich and Frank Hannig",
title = "Symbolic Multi-Level Loop Mapping of Loop Programs for
Massively Parallel Processor Arrays",
journal = j-TECS,
volume = "17",
number = "2",
pages = "31:1--31:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092952",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Today's MPSoCs (multiprocessor systems-on-chip) have
brought up massively parallel processor array
accelerators that may achieve a high computational
efficiency by exploiting multiple levels of parallelism
and different memory hierarchies. Such parallel
processor arrays are perfect targets, particularly for
the acceleration of nested loop programs due to their
regular and massively parallel nature. However,
existing loop parallelization techniques are often
unable to exploit multiple levels of parallelism and
are either I/O or memory bounded. Furthermore, if the
number of available processing elements becomes only
known at runtime-as in adaptive systems-static
approaches fail. In this article, we solve some of
these problems by proposing a hybrid compile/runtime
multi-level symbolic parallelization technique that is
able to: (a) exploit multiple levels of parallelism as
well as (b) different memory hierarchies, and (c) to
match the I/O or memory capabilities of the target
architecture for scenarios where the number of
available processing elements is only known at runtime.
Our proposed technique consists of two compile-time
transformations: (a) symbolic hierarchical tiling
followed by (b) symbolic multi-level scheduling. The
tiling levels scheduled in parallel exploit different
levels of parallelism, whereas the sequential one,
different memory hierarchies. Furthermore, by tuning
the size of the tiles on the individual levels, a
tradeoff between the necessary I/O-bandwidth and memory
is possible, which facilitates obeying resource
constraints. The resulting schedules are symbolic with
respect to the problem size and tile sizes. Thus, the
number of processing elements to map onto does not need
to be known at compile time. At runtime, when the
number of available processors becomes known, a simple
prologue chooses a feasible schedule with respect to
I/O and memory constraints that is latency-optimal for
the chosen tile size. In summary, our approach
determines the set of feasible, latency-optimal
symbolic loop schedule candidates at compile time, from
which one is dynamically selected at runtime. This
approach exploits multiple levels of parallelism, is
independent of the problem size of the loop nest, and
thereby avoids any expensive re-compilation at runtime.
This is particularly important for low cost and
memory-scarce embedded MPSoC platforms that may not
afford to host a just-in-time compiler.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Attie:2018:MPR,
author = "Paul C. Attie and Kinan Dak {Al Bab} and Mouhammad
Sakr",
title = "Model and Program Repair via {SAT} Solving",
journal = j-TECS,
volume = "17",
number = "2",
pages = "32:1--32:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3147426",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "We consider the subtractive model repair problem:
given a finite Kripke structure M and a CTL formula $
\eta $, determine if M contains a substructure M$^'$
that satisfies $ \eta $. Thus, M can be ``repaired'' to
satisfy eta by deleting some transitions and states. We
map an instance $ \langle M, \eta \rangle $ of model
repair to a Boolean formula repair $ (M, \eta)$ such
that $ \langle M, \eta \rangle $ has a solution iff
repair $ (M, \eta)$ is satisfiable. Furthermore, a
satisfying assignment determines which states and
transitions must be removed from $M$ to yield a model $
M^'$ of $ \eta $. Thus, we can use any SAT solver to
repair Kripke structures. Using a complete SAT solver
yields a complete algorithm: it always finds a repair
if one exists. We also show that CTL model repair is
NP-complete. We extend the basic repair method in three
directions: (1) the use of abstraction mappings, that
is, repair a structure abstracted from M and then
concretize the resulting repair to obtain a repair of
M, (2) repair concurrent Kripke structures and
concurrent programs: we use the pairwise method of
Attie and Emerson to represent and repair the behavior
of a concurrent program, as a set of ``concurrent
Kripke structures'', with only a quadratic increase in
the size of the repair formula, and (3) repair
hierarchical Kripke structures: we use a CTL formula to
summarize the behavior of each ``box,'' and CTL
deduction to relate the box formula with the overall
specification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Smyth:2018:SSC,
author = "Steven Smyth and Christian Motika and Karsten Rathlev
and Reinhard {Von Hanxleden} and Michael Mendler",
title = "{SCEst}: Sequentially Constructive {Esterel}",
journal = j-TECS,
volume = "17",
number = "2",
pages = "33:1--33:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3063129",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The synchronous language Esterel provides determinate
concurrency for reactive systems. Determinacy is
ensured by the signal coherence rule, which demands
that signals have a stable value throughout one
reaction cycle. This is natural for the original
application domains of Esterel, such as controller
design and hardware development; however, it is
unnecessarily restrictive for software development.
Sequentially Constructive Esterel (SCEst) overcomes
this restriction by allowing values to change
instantaneously, as long as determinacy is still
guaranteed, adopting the recently proposed Sequentially
Constructive model of computation. SCEst is grounded in
the minimal Sequentially Constructive Language (scl),
which also provides a novel semantic definition and
compilation approach for Esterel.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dokhanchi:2018:FRD,
author = "Adel Dokhanchi and Bardh Hoxha and Georgios Fainekos",
title = "Formal Requirement Debugging for Testing and
Verification of Cyber-Physical Systems",
journal = j-TECS,
volume = "17",
number = "2",
pages = "34:1--34:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3147451",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A framework for the elicitation and debugging of
formal specifications for Cyber-Physical Systems is
presented. The elicitation of specifications is handled
through a graphical interface. Two debugging algorithms
are presented. The first checks for erroneous or
incomplete temporal logic specifications without
considering the system. The second can be utilized for
the analysis of reactive requirements with respect to
system test traces. The specification debugging
framework is applied on a number of formal
specifications collected through a user study. The user
study establishes that requirement errors are common
and that the debugging framework can resolve many
insidious specification errors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2018:FPS,
author = "Zheng Li and Shuibing He",
title = "Fixed-Priority Scheduling for Two-Phase
Mixed-Criticality Systems",
journal = j-TECS,
volume = "17",
number = "2",
pages = "35:1--35:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3105921",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, a two-phase execution model is
proposed for mixed-criticality (MC) tasks. Different
from traditional MC tasks with a computation phase
only, the two-phase execution model requires a
memory-access phase first to fetch the instructions and
data, and then computation. Theoretical foundations are
first established for a schedulability test under given
memory-access and computation priority assignment.
Based on the established theoretical conclusions, a
two-stage priority assignment algorithm, which can find
the best priority assignment for both memory-access and
computation phases under fixed-priority scheduling, is
further developed. Extensive experiments have been
conducted and the experimental results validate the
effectiveness of our proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2018:EVL,
author = "Lihao Liang and Tom Melham and Daniel Kroening and
Peter Schrammel and Michael Tautschnig",
title = "Effective Verification for Low-Level Software with
Competing Interrupts",
journal = j-TECS,
volume = "17",
number = "2",
pages = "36:1--36:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3147432",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Interrupt-driven software is difficult to test and
debug, especially when interrupts can be nested and
subject to priorities. Interrupts can arrive at
arbitrary times, leading to an exponential blow-up in
the number of cases to consider. We present a new
formal approach to verifying interrupt-driven software
based on symbolic execution. The approach leverages
recent advances in the encoding of the execution traces
of interacting, concurrent threads. We assess the
performance of our method on benchmarks drawn from
embedded systems code and device drivers, and
experimentally compare it to conventional approaches
that use source-to-source transformations. Our results
show that our method significantly outperforms these
techniques. To the best of our knowledge, our work is
the first to demonstrate effective verification of
low-level embedded software with nested interrupts.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xie:2018:ESA,
author = "Xinfeng Xie and Dayou Du and Qian Li and Yun Liang and
Wai Teng Tang and Zhong Liang Ong and Mian Lu and Huynh
Phung Huynh and Rick Siow Mong Goh",
title = "Exploiting Sparsity to Accelerate Fully Connected
Layers of {CNN}-Based Applications on Mobile {SoCs}",
journal = j-TECS,
volume = "17",
number = "2",
pages = "37:1--37:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3122788",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Convolutional neural networks (CNNs) are widely
employed in many image recognition applications. With
the proliferation of embedded and mobile devices, such
applications are becoming commonplace on mobile
devices. Network pruning is a commonly used strategy to
reduce the memory and storage footprints of CNNs on
mobile devices. In this article, we propose customized
versions of the sparse matrix multiplication algorithm
to speed up inference on mobile devices and make it
more energy efficient. Specifically, we propose a Block
Compressed Sparse Column algorithm and a
bit-representation-based algorithm (BitsGEMM) that
exploit sparsity to accelerate the fully connected
layers of a network on the NVIDIA Jetson TK1 platform.
We evaluate the proposed algorithms using real-world
object classification and object detection
applications. Experiments show that performance
speedups can be achieved over the original baseline
implementation using cuBLAS. On object detection CNNs,
an average speedup of $ 1.82 \times $ is obtained over
baseline cuBLAS in the fully connected layer of the VGG
model, whereas on classification CNNs, an average
speedup of $ 1.51 \times $ is achieved for the fully
connected layer of the pruned-VGG model. Energy
consumption reduction of 43--46\% is also observed due
to decreased computational and memory bandwidth
demands.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lu:2018:TSI,
author = "Sixing Lu and Roman Lysecky",
title = "Time and Sequence Integrated Runtime Anomaly Detection
for Embedded Systems",
journal = j-TECS,
volume = "17",
number = "2",
pages = "38:1--38:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3122785",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Network-connected embedded systems grow on a large
scale as a critical part of Internet of Things, and
these systems are under the risk of increasing malware.
Anomaly-based detection methods can detect malware in
embedded systems effectively and provide the advantage
of detecting zero-day exploits relative to
signature-based detection methods, but existing
approaches incur significant performance overheads and
are susceptible to mimicry attacks. In this article, we
present a formal runtime security model that defines
the normal system behavior including execution sequence
and execution timing. The anomaly detection method in
this article utilizes on-chip hardware to
non-intrusively monitor system execution through trace
port of the processor and detect malicious activity at
runtime. We further analyze the properties of the
timing distribution for control flow events, and select
subset of monitoring targets by three selection metrics
to meet hardware constraint. The designed detection
method is evaluated by a network-connected pacemaker
benchmark prototyped in FPGA and simulated in SystemC,
with several mimicry attacks implemented at different
levels. The resulting detection rate and false positive
rate considering constraints on the number of monitored
events supported in the on-chip hardware demonstrate
good performance of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ballabriga:2018:SWC,
author = "Cl{\'e}ment Ballabriga and Julien Forget and Giuseppe
Lipari",
title = "Symbolic {WCET} Computation",
journal = j-TECS,
volume = "17",
number = "2",
pages = "39:1--39:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3147413",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Parametric Worst-case execution time (WCET) analysis
of a sequential program produces a formula that
represents the worst-case execution time of the
program, where parameters of the formula are
user-defined parameters of the program (as loop bounds,
values of inputs, or internal variables, etc). In this
article we propose a novel methodology to compute the
parametric WCET of a program. Unlike other algorithms
in the literature, our method is not based on Integer
Linear Programming (ILP). Instead, we follow an
approach based on the notion of symbolic computation of
WCET formulae. After explaining our methodology and
proving its correctness, we present a set of
experiments to compare our method against the state of
the art. We show that our approach dominates other
parametric analyses and produces results that are very
close to those produced by non-parametric ILP-based
approaches, while keeping very good computing time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dutt:2018:ADA,
author = "Sunil Dutt and Sukumar Nandi and Gaurav Trivedi",
title = "Analysis and Design of Adders for Approximate
Computing",
journal = j-TECS,
volume = "17",
number = "2",
pages = "40:1--40:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3131274",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The concept of approximate computing, that is, to
sacrifice computation quality for computation efforts,
has recently emerged as a promising design approach.
Over the past decade, several research works have
explored approximate computing at both the software
level and hardware level of abstraction with
encouraging results. At the hardware level of
abstraction, adders (being the fundamental and most
widely used data operators in digital systems) have
attracted a significant attention for approximation. In
this article, we first explain briefly the
need/significance of approximate adders. We then
propose four Approximate Full Adders (AFAs) for
high-performance energy-efficient approximate
computing. The key design objective behind the proposed
AFAs is to curtail the length of carry propagation
subjected to minimal error rate. Next, we exploit one
of the proposed AFAs (optimal one) to construct an
N-bit approximate adder that hereinafter is referred as
``ApproxADD.'' An emergent property of ApproxADD is
that carries do not propagate in it, and, consequently,
it provides bit-width-aware constant delay (O(1)).
ApproxADD also provides improvement in dynamic power
consumption by 46.31\% and in area by 28.57\% w.r.t.
Ripple Carry Adder (RCA), which exhibits the lowest
power and area. Although ApproxADD provides a
significant improvement in delay, power, and area, it
may not be preferred for some of the error-resilient
applications because its: (i) Error Distance (ED) is
too high; and (ii) Error Rate (ER) increases rapidly
with bit-width ($N$). To improve ED and ER, we exploit
the concept of carry-lifetime and Error Detection and
Correction logic, respectively. In this way, we
introduce two more (improved) versions of
ApproxADD--ApproxADD $ \upsilon $ 1 and ApproxADD. We
call these as ApproxADD $ \upsilon $ 1 and ApproxADD $
\upsilon $ 2 with existing approximate adders based on
conventional design metrics and approximate computing
design metrics. Furthermore, to inspect effectiveness
of the proposed approach in real-life applications, we
demonstrate image compression and decompression by
replacing the conventional addition operations in
Discrete Cosine Transform (DCT) and Inverse Discrete
Cosine Transform (IDCT) modules with ApproxADD $
\upsilon $ 2.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leech:2018:RPP,
author = "Charles Leech and Charan Kumar and Amit Acharyya and
Sheng Yang and Geoff V. Merrett and Bashir M.
Al-Hashimi",
title = "Runtime Performance and Power Optimization of Parallel
Disparity Estimation on Many-Core Platforms",
journal = j-TECS,
volume = "17",
number = "2",
pages = "41:1--41:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3133560",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article investigates the use of many-core systems
to execute the disparity estimation algorithm, used in
stereo vision applications, as these systems can
provide flexibility between performance scaling and
power consumption. We present a learning-based runtime
management approach that achieves a required
performance threshold while minimizing power
consumption through dynamic control of frequency and
core allocation. Experimental results are obtained from
a 61-core Intel Xeon Phi platform for the
aforementioned investigation. The same performance can
be achieved with an average reduction in power
consumption of 27.8\% and increased energy efficiency
by 30.04\% when compared to Dynamic Voltage and
Frequency Scaling control alone without runtime
management.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2018:FRT,
author = "Ganghee Lee and Ediz Cetin and Oliver Diessel",
title = "Fault Recovery Time Analysis for Coarse-Grained
Reconfigurable Architectures",
journal = j-TECS,
volume = "17",
number = "2",
pages = "42:1--42:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3140944",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Coarse-grained reconfigurable architectures (CGRAs)
have drawn increasing attention due to their
performance and flexibility advantages. Typically,
CGRAs incorporate many processing elements in the form
of an array, which is suitable for implementing spatial
redundancy, as used in the design of fault-tolerant
systems. This article introduces a recovery time model
for transient faults in CGRAs. The proposed
fault-tolerant CGRAs are based on triple modular
redundancy and coding techniques for error detection
and correction. To evaluate the model, several kernels
from space computing are mapped onto the suggested
architecture. We demonstrate the tradeoff between
recovery time, performance, and area. In addition, the
average execution time of an application including
recovery time is evaluated using area-based error-rate
estimates in harsh radiation environments. The results
show that task partitioning is important for bounding
the recovery time of applications that have long
execution times. It is also shown that error-correcting
code (ECC) is of limited practical value for tasks with
long execution times in high radiation environments, or
when the degree of task partitioning is high.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Harrison:2018:CPR,
author = "David C. Harrison and Winston K. G. Seah and Ramesh
Rayudu",
title = "Coverage Preservation with Rapid Forwarding in
Energy-Harvesting Wireless Sensor Networks for Critical
Rare Events",
journal = j-TECS,
volume = "17",
number = "2",
pages = "43:1--43:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3140961",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Wireless sensor networks for rarely occurring critical
events must maintain sensing coverage and low-latency
network connectivity to ensure event detection and
subsequent rapid propagation of notification messages.
Few algorithms have been proposed that address both
coverage and forwarding and those that do are either
unconcerned with rapid propagation or are not optimised
to handle the constant changes in topology observed in
duty-cycled networks. This article proposes an
algorithm for Coverage Preservation with Rapid
Forwarding (CPRF). The algorithm is shown to deliver
perfect coverage maintenance and low-latency guaranteed
message propagation whilst allowing stored-charge
conservation via collaborative duty cycling in
energy-harvesting networks. Favourable comparisons are
made against established and recently proposed
algorithms in both sparse planned and dense random
distributions. Further, an implementation for
commercially available wireless sensing devices is
evaluated for detection and notification of damage to
highway light poles caused by vortex shedding.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2018:ECB,
author = "He Li and Kaoru Ota and Mianxiong Dong",
title = "Energy Cooperation in Battery-Free Wireless
Communications with Radio Frequency Energy Harvesting",
journal = j-TECS,
volume = "17",
number = "2",
pages = "44:1--44:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3141249",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Radio frequency (RF) energy harvesting techniques are
becoming a potential method to power battery-free
wireless networks. In RF energy harvesting
communications, energy cooperation enables shaping and
optimization of the energy arrivals at the
energy-receiving node to improve the overall system
performance. In this article, we propose an energy
cooperation scheme that enables energy cooperation in
battery-free wireless networks with RF harvesting. We
first study the battery-free wireless network with RF
energy harvesting and then state the problem that
optimizing the system performance with limited
harvesting energy through new energy cooperation
protocol. Finally, from the extensive simulation
results, our energy cooperation protocol performs
better than the original battery-free wireless network
solution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2018:SCG,
author = "Jurn-Gyu Park and Chen-Ying Hsieh and Nikil Dutt and
Sung-Soo Lim",
title = "Synergistic {CPU--GPU} Frequency Capping for
Energy-Efficient Mobile Games",
journal = j-TECS,
volume = "17",
number = "2",
pages = "45:1--45:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3145337",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Mobile platforms are increasingly using Heterogeneous
Multiprocessor Systems-on-Chip (HMPSoCs) with
differentiated processing cores and GPUs to achieve
high performance for graphics-intensive applications
such as mobile games. Traditionally, separate CPU and
GPU governors are deployed in order to achieve energy
efficiency through Dynamic Voltage Frequency Scaling
(DVFS) but miss opportunities for further energy
savings through coordinated system-level application of
DVFS. We present a cooperative CPU-GPU DVFS strategy
(called Co-Cap) that orchestrates energy-efficient CPU
and GPU DVFS through synergistic CPU and GPU frequency
capping to avoid frequency overprovisioning while
maintaining desired performance. Unlike traditional
approaches that target a narrow set of mobile games,
our Co-Cap approach is applicable across a wide range
of microbenchmarks and mobile games. Our methodology
employs a systematic training phase using fine-grained
refinement steps with evaluations of frequency capping
tables followed by a deployment phase, allowing
deployment across a wide range of microbenchmarks and
mobile games with varying graphics workloads. Our
experimental results across multiple sets of over 200
microbenchmarks and 40 mobile games show that Co-Cap
improves energy per frame by on average 8.9\% (up to
18.3\%) and 7.8\% (up to 27.6\%) (16.6\% and 15.7\% in
CPU-dominant applications) and achieves minimal
frames-per-second (FPS) loss by 0.9\% and 0.85\% (1.3\%
and 1.5\% in CPU-dominant applications) on average in
training and deployment sets, respectively, compared to
the default CPU and GPU governors, with negligible
overhead in execution time and power consumption on the
ODROID-XU3 platform.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Narayan:2018:MTR,
author = "Apurva Narayan and Greta Cutulenco and Yogi Joshi and
Sebastian Fischmeister",
title = "Mining Timed Regular Specifications from System
Traces",
journal = j-TECS,
volume = "17",
number = "2",
pages = "46:1--46:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3147660",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Temporal properties define the order of occurrence and
timing constraints on event occurrence. Such
specifications are important for safety-critical
real-time systems. We propose a framework for
automatically mining temporal properties that are in
the form of timed regular expressions (TREs) from
system traces. Using an abstract structure of the
property, the framework constructs a finite state
machine to serve as an acceptor. We analytically derive
speedup for the fragment and confirm the speedup using
empirical validation with synthetic traces. The
framework is evaluated on industrial-strength
safety-critical real-time applications using traces
with more than 1 million entries.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shoushtari:2018:SIS,
author = "Majid Shoushtari and Bryan Donyanavard and Luis Angel
D. Bathen and Nikil Dutt",
title = "{ShaVe-ICE}: Sharing Distributed Virtualized {SPMs} in
Many-Core Embedded Systems",
journal = j-TECS,
volume = "17",
number = "2",
pages = "47:1--47:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3157667",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Traditional approaches for managing
software-programmable memories (SPMs) do not support
sharing of distributed on-chip memory resources and,
consequently, miss the opportunity to better utilize
those memory resources. Managing on-chip memory
resources in many-core embedded systems with
distributed SPMs requires runtime support to share
memory resources between various threads with different
memory demands running concurrently. Runtime SPM
managers cannot rely on prior knowledge about the
dynamically changing mix of threads that will execute
and therefore should be designed in a way that enables
SPM allocations for any unpredictable mix of threads
contending for on-chip memory space. This article
proposes ShaVe-ICE, an operating-system-level solution,
along with hardware support, to virtualize and
ultimately share SPM resources across a many-core
embedded system to reduce the average memory latency.
We present a number of simple allocation policies to
improve performance and energy. Experimental results
show that sharing SPMs could reduce the average
execution time of the workload up to 19.5\% and reduce
the dynamic energy consumed in the memory subsystem up
to 14\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{He:2018:AFI,
author = "Zhijian He and Yao Chen and Zhaoyan Shen",
title = "Attitude Fusion of Inertial and Magnetic Sensor under
Different Magnetic Filed Distortions",
journal = j-TECS,
volume = "17",
number = "2",
pages = "48:1--48:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3157668",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "By virtue of gravity measurement from a handheld
inertial measurement unit (IMU) sensor, current indoor
attitude estimation algorithms can provide accurate
roll/pitch dimension angles. Acquisition of precise
heading is limited by the absence of accurate magnetic
reference. Consequently, initial stage magnetometer
calibration is deployed to alleviate this bottleneck in
attitude fusion. However, available algorithms tackle
magnetic distortion based on time-invariant
surroundings, casting the post-calibration magnetic
data into unchanged ellipsoid centered in the
calibration place. Consequently, inaccurate fusion
results are formulated in a more common case of random
walk in time-varying magnetic indoor environment. This
article proposes a new fusion algorithm from various
kinds of IMU sensors, namely gyroscope, accelerometer,
and magnetometer. Compared to state-of-the-art attitude
fusion approaches, this article addresses the indoor
time-varying magnetic perturbation problem in a
geometric view. We propose an extend Kalman
filter--based algorithm based on this detailed
geometric model to eliminate the position-dependent
effect of a compass sensor. Experimental data
demonstrate that, under different indoor magnetic
distortion environments, our proposed attitude fusion
algorithm has the maximum angle error of 2.02${}^\circ
$, outperforming 7.17${}^\circ $ of a
gradient-declining-based algorithm. Additionally, this
attitude fusion result is constructed in a low-cost
handheld arduino core--based IMU device, which can be
widely applied to embedded systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhattacharjee:2018:CRM,
author = "Sukanta Bhattacharjee and Yi-Ling Chen and Juinn-Dar
Huang and Bhargab B. Bhattacharya",
title = "Concentration-Resilient Mixture Preparation with
Digital Microfluidic Lab-on-Chip",
journal = j-TECS,
volume = "17",
number = "2",
pages = "49:1--49:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3157094",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Sample preparation plays a crucial role in almost all
biochemical applications, since a predominant portion
of biochemical analysis time is associated with sample
collection, transportation, and preparation. Many
sample-preparation algorithms are proposed in the
literature that are suitable for execution on
programmable digital microfluidic (DMF) platforms. In
most of the existing DMF-based sample-preparation
algorithms, a fixed target ratio is provided as input,
and the corresponding mixing tree is generated as
output. However, in many biochemical applications,
target mixtures with exact component proportions may
not be needed. From a biochemical perspective, it may
be sufficient to prepare a mixture in which the input
reagents may lie within a range of concentration
factors. The choice of a particular valid ratio,
however, strongly impacts solution-preparation cost and
time. To address this problem, we propose a
concentration-resilient ratio-selection method from the
input ratio space so that the reactant cost is
minimized. We propose an integer linear
programming--based method that terminates very fast
while producing the optimum solution, considering both
uniform and weighted cost of reagents. Experimental
results reveal that the proposed method can be used
conveniently in tandem with several existing
sample-preparation algorithms for improving their
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2018:MCV,
author = "Shuoxin Lin and Jiahao Wu and Shuvra S.
Bhattacharyya",
title = "Memory-Constrained Vectorization and Scheduling of
Dataflow Graphs for Hybrid {CPU--GPU} Platforms",
journal = j-TECS,
volume = "17",
number = "2",
pages = "50:1--50:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3157669",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The increasing use of heterogeneous embedded systems
with multi-core CPUs and Graphics Processing Units
(GPUs) presents important challenges in effectively
exploiting pipeline, task, and data-level parallelism
to meet throughput requirements of digital signal
processing applications. Moreover, in the presence of
system-level memory constraints, hand optimization of
code to satisfy these requirements is inefficient and
error prone and can therefore, greatly slow down
development time or result in highly underutilized
processing resources. In this article, we present
vectorization and scheduling methods to effectively
exploit multiple forms of parallelism for throughput
optimization on hybrid CPU-GPU platforms, while
conforming to system-level memory constraints. The
methods operate on synchronous dataflow
representations, which are widely used in the design of
embedded systems for signal and information processing.
We show that our novel methods can significantly
improve system throughput compared to previous
vectorization and scheduling approaches under the same
memory constraints. In addition, we present a practical
case-study of applying our methods to significantly
improve the throughput of an orthogonal frequency
division multiplexing receiver system for wireless
communications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2018:HPH,
author = "Tian Huang and Yongxin Zhu and Yajun Ha and Xu Wang
and Meikang Qiu",
title = "A Hardware Pipeline with High Energy and Resource
Efficiency for {FMM} Acceleration",
journal = j-TECS,
volume = "17",
number = "2",
pages = "51:1--51:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3157670",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The fast multipole method (FMM) is a promising
mathematical technique that accelerates the calculation
of long-ranged forces in the large-sized n-body
problem. Existing implementations of the FMM on
general-purpose processors are energy and resource
inefficient. To mitigate these issues, we propose a
hardware pipeline that accelerates three key FMM steps.
The pipeline improves energy efficiency by exploiting
fine-granularity parallelism of the FMM. We reuse the
pipeline for different FMM steps to reduce resource
usage by 66\%. Compared to the state-of-the-art
implementations on CPUs and GPUs, our implementation
requires 15\% less energy and delivers 2.61 times more
floating-point operations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Qian:2018:ECD,
author = "Kun Qian and Chenshu Wu and Zheng Yang and Yunhao Liu
and Fugui He and Tianzhang Xing",
title = "Enabling Contactless Detection of Moving Humans with
Dynamic Speeds Using {CSI}",
journal = j-TECS,
volume = "17",
number = "2",
pages = "52:1--52:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3157677",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Device-free passive detection is an emerging
technology to detect whether there exist any moving
entities in the areas of interest without attaching any
device to them. It is an essential primitive for a
broad range of applications including intrusion
detection for safety precautions, patient monitoring in
hospitals, child and elder care at home, and so forth.
Despite the prevalent signal feature Received Signal
Strength (RSS), most robust and reliable solutions
resort to a finer-grained channel descriptor at the
physical layer, e.g., the Channel State Information
(CSI) in the 802.11n standard. Among a large body of
emerging techniques, however, few of them have explored
the full potential of CSI for human detection.
Moreover, space diversity supported by nowadays popular
multiantenna systems are not investigated to a
comparable extent as frequency diversity. In this
article, we propose a novel scheme for device-free
PAssive Detection of moving humans with dynamic Speed
(PADS). Both full information (amplitude and phase) of
CSI and space diversity across multiantennas in MIMO
systems are exploited to extract and shape sensitive
metrics for accuracy and robust target detection. We
prototype PADS on commercial WiFi devices, and
experiment results in different scenarios demonstrate
that PADS achieves great performance improvement in
spite of dynamic human movements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guo:2018:CSP,
author = "Danlu Guo and Mohamed Hassan and Rodolfo Pellizzoni
and Hiren Patel",
title = "A Comparative Study of Predictable {DRAM}
Controllers",
journal = j-TECS,
volume = "17",
number = "2",
pages = "53:1--53:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3158208",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recently, the research community has introduced
several predictable dynamic random-access memory (DRAM)
controller designs that provide improved worst-case
timing guarantees for real-time embedded systems. The
proposed controllers significantly differ in terms of
arbitration, configuration, and simulation environment,
making it difficult to assess the contribution of each
approach. To bridge this gap, this article provides the
first comprehensive evaluation of state-of-the-art
predictable DRAM controllers. We propose a
categorization of available controllers, and introduce
an analytical performance model based on worst-case
latency. We then conduct an extensive evaluation for
all state-of-the-art controllers based on a common
simulation platform, and discuss findings and
recommendations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mozaffari-Kermani:2018:ERE,
author = "Mehran Mozaffari-Kermani and Reza Azarderakhsh and
Ausmita Sarker and Amir Jalali",
title = "Efficient and Reliable Error Detection Architectures
of Hash-Counter-Hash Tweakable Enciphering Schemes",
journal = j-TECS,
volume = "17",
number = "2",
pages = "54:1--54:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3159173",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Through pseudorandom permutation, tweakable
enciphering schemes (TES) constitute block cipher modes
of operation which perform length-preserving
computations. The state-of-the-art research has focused
on different aspects of TES, including implementations
on hardware [field-programmable gate array (FPGA)/
application-specific integrated circuit (ASIC)] and
software (hard/soft-core microcontrollers) platforms,
algorithmic security, and applicability to sensitive,
security-constrained usage models. In this article, we
propose efficient approaches for protecting such
schemes against natural and malicious faults.
Specifically, noting that intelligent attackers do not
merely get confined to injecting multiple faults, one
major benchmark for the proposed schemes is evaluation
toward biased and burst fault models. We evaluate a
variant of TES, i.e., the Hash-Counter-Hash scheme,
which involves polynomial hashing as other variants are
either similar or do not constitute finite field
multiplication which, by far, is the most involved
operation in TES. In addition, we benchmark the
overhead and performance degradation on the ASIC
platform. The results of our error injection
simulations and ASIC implementations show the
suitability of the proposed approaches for a wide range
of applications including deeply embedded systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Real:2018:ADS,
author = "Maria M{\'e}ndez Real and Philipp Wehner and Vianney
Lapotre and Diana G{\"o}hringer and Guy Gogniat",
title = "Application Deployment Strategies for Spatial
Isolation on Many-Core Accelerators",
journal = j-TECS,
volume = "17",
number = "2",
pages = "55:1--55:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3168383",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Current cache Side-Channel Attacks (SCAs)
countermeasures have not been designed for many-core
architectures and need to be revisited in order to be
practical for these new technologies. Spatial isolation
of resources for sensitive applications has been
proposed taking advantage of the large number of
resources offered by these architectures. This solution
avoids cache sharing with sensitive processes.
Consequently, their cache activity cannot be monitored
and cache SCAs cannot be performed. This work focuses
on the implementation of this technique in order to
minimize the induced performance overhead. Different
strategies for the management of isolated secure zones
are implemented and compared.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sui:2018:LOP,
author = "Yulei Sui and Xiaokang Fan and Hao Zhou and Jingling
Xue",
title = "Loop-Oriented Pointer Analysis for Automatic {SIMD}
Vectorization",
journal = j-TECS,
volume = "17",
number = "2",
pages = "56:1--56:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3168364",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Compiler-based vectorization represents a promising
solution to automatically generate code that makes
efficient use of modern CPUs with SIMD extensions. Two
main auto-vectorization techniques, superword-level
parallelism vectorization (SLP) and loop-level
vectorization (LLV), require precise dependence
analysis on arrays and structs to vectorize isomorphic
scalar instructions (in the case of SLP) and reduce
dynamic dependence checks at runtime (in the case of
LLV). The alias analyses used in modern vectorizing
compilers are either intra-procedural (without tracking
inter-procedural data-flows) or inter-procedural (by
using field-sensitive models, which are too imprecise
in handling arrays and structs). This article proposes
an inter-procedural Loop-oriented Pointer Analysis for
C, called Lpa, for analyzing arrays and structs to
support aggressive SLP and LLV optimizations
effectively. Unlike field-insensitive solutions that
pre-allocate objects for each memory allocation site,
our approach uses a lazy memory model to generate
access-based location sets based on how structs and
arrays are accessed. Lpa can precisely analyze arrays
and nested aggregate structures to enable SIMD
optimizations for large programs. By separating the
location set generation as an independent concern from
the rest of the pointer analysis, Lpa is designed so
that existing points-to resolution algorithms (e.g.,
flow-insensitive and flow-sensitive pointer analysis)
can be reused easily. We have implemented L pa fully in
the LLVM compiler infrastructure (version 3.8.0). We
evaluate Lpa by considering SLP and LLV, the two
classic vectorization techniques, on a set of 20 C and
Fortran CPU2000/2006 benchmarks. For SLP, Lpa
outperforms LLVM's BasicAA and ScevAA by discovering
139 and 273 more vectorizable basic blocks,
respectively, resulting in the best speedup of 2.95\%
for 173.applu. For LLV, LLVM introduces totally 551 and
652 static bound checks under BasicAA and ScevAA,
respectively. In contrast, Lpa has reduced these static
checks to 220, with an average of 15.7 checks per
benchmark, resulting in the best speedup of 7.23\% for
177.mesa.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2018:TES,
author = "Feng Li and Yanbing Yang and Zicheng Chi and Liya Zhao
and Yaowen Yang and Jun Luo",
title = "{Trinity}: Enabling Self-Sustaining {WSNs} Indoors
with Energy-Free Sensing and Networking",
journal = j-TECS,
volume = "17",
number = "2",
pages = "57:1--57:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173039",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:34 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Whereas a lot of efforts have been put on energy
conservation in wireless sensor networks (WSNs), the
limited lifetime of these systems still hampers their
practical deployments. This situation is further
exacerbated indoors, as conventional energy harvesting
(e.g., solar) may not always work. To enable long-lived
indoor sensing, we report in this article a
self-sustaining sensing system that draws energy from
indoor environments, adapts its duty-cycle to the
harvested energy, and pays back the environment by
enhancing the awareness of the indoor microclimate
through an ``energy-free'' sensing. First of all, given
the pervasive operation of heating, ventilation, and
air conditioning (HVAC) systems indoors, our system
harvests energy from airflow introduced by the HVAC
systems to power each sensor node. Secondly, as the
harvested power is tiny, an extremely low but
synchronous duty-cycle has to be applied whereas the
system gets no energy surplus to support existing
synchronization schemes. So, we design two
complementary synchronization schemes that cost
virtually no energy. Finally, we exploit the feature of
our harvester to sense the airflow speed in an
energy-free manner. To our knowledge, this is the first
indoor wireless sensing system that encapsulates energy
harvesting, network operating, and sensing all
together.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2018:EUE,
author = "Sandeep K. Shukla",
title = "Editorial: To Use or Not To? {Embedded} Systems for
Voting",
journal = j-TECS,
volume = "17",
number = "3",
pages = "58:1--58:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3206342",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Morse:2018:LAW,
author = "Jeremy Morse and Steve Kerrison and Kerstin Eder",
title = "On the Limitations of Analyzing Worst-Case Dynamic
Energy of Processing",
journal = j-TECS,
volume = "17",
number = "3",
pages = "59:1--59:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173042",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article examines dynamic energy consumption
caused by data during software execution on deeply
embedded microprocessors, which can be significant on
some devices. In worst-case energy consumption
analysis, energy models are used to find the most
costly execution path. Taking each instruction's
worst-case energy produces a safe but overly
pessimistic upper bound. Algorithms for safe and tight
bounds would be desirable. We show that finding exact
worst-case energy is NP-hard, and that tight bounds
cannot be approximated with guaranteed safety. We
conclude that any energy model targeting tightness must
either sacrifice safety or accept overapproximation
proportional to data-dependent energy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2018:CIA,
author = "Hwajeong Seo and Ilwoong Jeong and Jungkeun Lee and
Woo-Hwan Kim",
title = "Compact Implementations of {ARX}-Based Block Ciphers
on {IoT} Processors",
journal = j-TECS,
volume = "17",
number = "3",
pages = "60:1--60:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173455",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we present implementations for
Addition, Rotation, and eXclusive-or (ARX)-based block
ciphers, including LEA and HIGHT, on IoT devices,
including 8-bit AVR, 16-bit MSP, 32-bit ARM, and 32-bit
ARM-NEON processors. We optimized 32-/8-bitwise ARX
operations for LEA and HIGHT block ciphers by
considering variations in word size, the number of
general purpose registers, and the instruction set of
the target IoT devices. Finally, we achieved the most
compact implementations of LEA and HIGHT block ciphers.
The implementations were fairly evaluated through the
Fair Evaluation of Lightweight Cryptographic Systems
framework, and implementations won the competitions in
the first and the second rounds.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hong:2018:ISP,
author = "Ding-Yong Hong and Yu-Ping Liu and Sheng-Yu Fu and
Jan-Jan Wu and Wei-Chung Hsu",
title = "Improving {SIMD} Parallelism via Dynamic Binary
Translation",
journal = j-TECS,
volume = "17",
number = "3",
pages = "61:1--61:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173456",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent trends in SIMD architecture have tended toward
longer vector lengths, and more enhanced SIMD features
have been introduced in newer vector instruction sets.
However, legacy or proprietary applications compiled
with short-SIMD ISA cannot benefit from the long-SIMD
architecture that supports improved parallelism and
enhanced vector primitives, resulting in only a small
fraction of potential peak performance. This article
presents a dynamic binary translation technique that
enables short-SIMD binaries to exploit benefits of new
SIMD architectures by rewriting short-SIMD loop code.
We propose a general approach that translates loops
consisting of short-SIMD instructions to
machine-independent IR, conducts SIMD loop
transformation/optimization at this IR level, and
finally translates to long-SIMD instructions. Two
solutions are presented to enforce SIMD load/store
alignment, one for the problem caused by the binary
translator's internal translation condition and one
general approach using dynamic loop peeling
optimization. Benchmark results show that average
speedups of $ 1.51 \times $ and $ 2.48 \times $ are
achieved for an ARM NEON to x86 AVX2 and x86 AVX-512
loop transformation, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2018:PEP,
author = "Jiutian Zhang and Yuhang Liu and Haifeng Li and
Xiaojing Zhu and Mingyu Chen",
title = "{PTAT}: an Efficient and Precise Tool for Tracing and
Profiling Detailed {TLB} Misses",
journal = j-TECS,
volume = "17",
number = "3",
pages = "62:1--62:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182174",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As the memory access footprints of applications in
areas like data analytics increase, the latency
overhead of translation lookaside buffer (TLB) misses
increases. Thus, the efficiency of TLB becomes
increasingly critical for overall system performance.
Analyzing TLB miss traces is useful for hardware
architecture design and software application
optimization. Utilizing cycle-accurate simulators or
instrumentation tools is very time-consuming and/or
inaccurate for tracing and profiling TLB misses. In
this article, we propose an efficient and precise tool
to collect and profile last-level TLB misses. This tool
utilizes a novel software method called Page Table
Access Tracing (PTAT), storing last-level page table
entries of certain workload processes into a reserved
uncached memory region. Therefore, each last-level TLB
miss incurred by user process corresponds to one
uncached page table access to main memory, which can be
captured and recorded by a hardware memory bus monitor.
The detected information is then dumped into offline
storage. In this manner, full TLB miss traces are
collected and can be analyzed flexibly. Compared to
previous software-based methods, this method achieves
higher performance. Experiments show that, compared
with a state-of-the-art kernel instrumentation method
(BadgerTrap), which lacks complete dumping trace
function, the speedup is still up to 3.88-fold for
memory-intensive benchmarks. Due to the improved
efficiency and completeness of tracing, case studies
validate that more flexible profiling can be conducted,
which is of great significance for TLB performance
optimization. The accuracy of PTAT is verified by both
dedicated sequence and performance counters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hosseinabady:2018:DEM,
author = "Mohammad Hosseinabady and Jose Luis Nunez-Yanez",
title = "Dynamic Energy Management of {FPGA} Accelerators in
Embedded Systems",
journal = j-TECS,
volume = "17",
number = "3",
pages = "63:1--63:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182172",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In this article, we investigate how to utilise an
Field-Programmable Gate Array (FPGA) in an embedded
system to save energy. For this purpose, we study the
energy efficiency of a hybrid FPGA-CPU device that can
switch task execution between hardware and software
with a focus on periodic tasks. To increase the
applicability of this task switching, we also consider
the voltage and frequency scaling (VFS) applied to the
FPGA to reduce the system energy consumption. We show
that in some cases, if the task's period is higher than
a specific level, the FPGA accelerator cannot reduce
the energy consumption associated to the task and the
software version is the most energy efficient option.
We have applied the proposed techniques to a robot map
creation algorithm as a case study which shows up to
38\% energy reduction compared to the FPGA
implementation. Overall, experimental results show up
to 48\% energy reduction by applying the proposed
techniques at runtime on 13 individual tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2018:OND,
author = "Hyeonggyu Kim and Minho Ju and Soontae Kim",
title = "{OnNetwork+}: Network Delay-Aware Management for
Mobile Systems",
journal = j-TECS,
volume = "17",
number = "3",
pages = "64:1--64:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182171",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Network errors such as packet losses consume large
amounts of energy. We analyzed the reason for this
through measurements using the latest smartphones and
full-system simulation. We found that on packet losses
the smartphones maintain high frequencies for CPU
without doing useful work. To address this problem, we
propose a method for reducing the energy consumption by
lowering the performance level by exploiting a dynamic
voltage and frequency scaling mechanism when long
network delays are expected. According to our
experiments, our method reduces the total energy
consumption of web browsing on two different
smartphones by up to 10.0\% and 11.5\%, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tsoutsouras:2018:HDR,
author = "Vasileios Tsoutsouras and Iraklis Anagnostopoulos and
Dimosthenis Masouros and Dimitrios Soudris",
title = "A Hierarchical Distributed Runtime Resource Management
Scheme for {NoC}-Based Many-Cores",
journal = j-TECS,
volume = "17",
number = "3",
pages = "65:1--65:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182173",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As technology constantly strengthens its presence in
all aspects of human life, computing systems integrate
a high number of processing cores, whereas applications
become more complex and greedy for computational
resources. Inevitably, this high increase in processing
elements combined with the unpredictable resource
requirements of executed applications at design time
impose new design constraints to resource management of
many-core systems, turning the distributed
functionality into a necessity. In this work, we
present a distributed runtime resource management
framework for many-core systems utilizing a
network-on-chip (NoC) infrastructure. Specifically, we
couple the concept of distributed management with
parallel applications by assigning different roles to
the available computing resources. The presented design
is based on the idea of local controllers and managers,
whereas an on-chip intercommunication scheme ensures
decision distribution. The evaluation of the proposed
framework was performed on an Intel Single-Chip Cloud
Computer, an actual NoC-based, many-core system.
Experimental results show that the proposed scheme
manages to allocate resources efficiently at runtime,
leading to gains of up to 30\% in application execution
latency compared to relevant state-of-the-art
distributed resource management frameworks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2018:CSI,
author = "Hwajeong Seo",
title = "Compact Software Implementation of Public-Key
Cryptography on {MSP430X}",
journal = j-TECS,
volume = "17",
number = "3",
pages = "66:1--66:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190855",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "On the low-end embedded processors, the
implementations of Elliptic Curve Cryptography (ECC)
are considered to be a challenging task due to the
limited computation power and storage of the low-end
embedded processors. Particularly, the multi-precision
multiplication and squaring operations are the most
expensive operations for ECC implementations. In order
to enhance the performance, many works presented
efficient multiplication and squaring routines on the
target devices. Recent works show that 128-bit security
level ECC is available within a second and this is
practically fast enough for IoT services. However,
previous approaches missed the other important storage
issues (i.e., program size, ROM). Considering that the
embedded processors only have a few KB ROM, we need to
pay attention to the compact ROM size with reasonable
performance. In this article, we present very compact
and generic implementations of multiplication and
squaring operations on the 16-bit MSP430X processors
for the ECC. The implementations utilize the new 32-bit
multiplier and advanced multiplication and squaring
routines. Since the proposed routines are generic, the
arbitrary length of operand is available with
high-speed and small code size. With proposed
multiplication and squaring routines, we implemented
Curve25519 on the MSP430X processors. The scalar
multiplication is performed within 6,666,895 clock
cycles and 4,054 bytes. Compared with previous works
based on the speed-optimized version, our
memory-efficient version reduces the code size by
59.8\%, sacrificing the execution timing by 20.5\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yassin:2018:AAC,
author = "Yahya H. Yassin and Francky Catthoor and Fabian
Kloosterman and Jyh-Jang Sun and Jo{\~a}O Couto and Per
Gunnar Kjeldsberg and Nick {Van Helleputte}",
title = "Algorithm\slash Architecture Co-optimisation Technique
for Automatic Data Reduction of Wireless Read-Out in
High-Density Electrode Arrays",
journal = j-TECS,
volume = "17",
number = "3",
pages = "67:1--67:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190854",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "High-density electrode arrays used to read out neural
activity will soon surpass the limits of the amount of
data that can be transferred within reasonable energy
budgets. This is true for wired brain implants when the
required bandwidth becomes very high, and even more so
for untethered brain implants that require wireless
transmission of data. We propose an energy-efficient
spike data extraction solution for high-density
electrode arrays, capable of reducing the data to be
transferred by over 85\%. We combine temporal and
spatial spike data analysis with low implementation
complexity, where amplitude thresholds are used to
detect spikes and the spatial location of the
electrodes is used to extract potentially useful
sub-threshold data on neighboring electrodes. We tested
our method against a state-of-the-art spike detection
algorithm, with prohibitively high implementation
complexity, and found that the majority of spikes are
extracted reliably. We obtain further improved quality
results when ignoring very small spikes below 30\% of
the voltage thresholds, resulting in 91\% accuracy. Our
approach uses digital logic and is therefore scalable
with an increasing number of electrodes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hammari:2018:RPD,
author = "Elena Hammari and Per Gunnar Kjeldsberg and Francky
Catthoor",
title = "Runtime Precomputation of Data-Dependent Parameters in
Embedded Systems",
journal = j-TECS,
volume = "17",
number = "3",
pages = "68:1--68:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3191311",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "In many modern embedded systems, the available
resources (e.g., CPU clock cycles, memory, and energy)
are consumed nonuniformly while the system is under
exploitation. Typically, the resource requirements in
the system change with different input data that the
system process. These data trigger different parts of
the embedded software, resulting in different
operations executed that require different hardware
platform resources to be used. A significant research
effort has been dedicated to develop mechanisms for
runtime resource management (e.g., branch prediction
for pipelined processors, prefetching of data from main
memory to cache, and scenario-based design
methodologies). All these techniques rely on the
availability of information at runtime about upcoming
changes in resource requirements. In this article, we
propose a method for detecting upcoming resource
changes based on preliminary calculation of software
variables that have the most dynamic impact on resource
requirements in the system. We apply the method on a
modified real-life biomedical algorithm with real input
data and estimate a 40\% energy reduction as compared
to static DVFS scheduling. Comparing to dynamic DVFS
scheduling, an 18\% energy reduction is demonstrated.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yoon:2018:SAF,
author = "Su-Kyung Yoon and Jitae Yun and Jung-Geun Kim and
Shin-Dug Kim",
title = "Self-Adaptive Filtering Algorithm with {PCM}-Based
Memory Storage System",
journal = j-TECS,
volume = "17",
number = "3",
pages = "69:1--69:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190856",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "This article proposes a new phase change memory- (PCM)
based memory storage architecture with associated
self-adaptive data filtering for various embedded
devices to support energy efficiency as well as high
computing power. In this approach, PCM-based memory
storage can be used as working memory and mass storage
layers simultaneously, and a self-adaptive data
filtering module composed of small DRAM dual buffers
was designed to improve unfavorable PCM features, such
as asymmetric read/write access latencies and limited
endurance and enhance spatial/temporal localities. In
particular, the self-adaptive data filtering algorithm
enhances data reusability by screening potentially high
reusable data and predicting adequate lifetime of those
data depending on current victim time decision value.
We also propose the possibility that a small amount of
DRAM buffer is embedded into mobile processors, keeping
this as small as possible for cost effectiveness and
energy efficiency. Experimental results show that by
exploiting a small amount of DRAM space for dual
buffers and using the self-adaptive filtering algorithm
to manage them, the proposed system can reduce
execution time by a factor of 1.9 compared to the
unified conventional model with same the DRAM capacity
and can be considered comparable to 1.5$ \times $ DRAM
capacity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Amanollahi:2018:ERD,
author = "Saba Amanollahi and Ghassem Jaberipur",
title = "Extended Redundant-Digit Instruction Set for
Energy-Efficient Processors",
journal = j-TECS,
volume = "17",
number = "3",
pages = "70:1--70:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3202664",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The impact of extending the instruction set
architecture (ISA) of a conventional binary processor
by a set of redundant-digit arithmetic instructions is
studied. Selected binary arithmetic instructions within
a given code sequence are replaced with appropriate
redundant-digit ones. The selection criteria is so
enforced to lead to overall reduction of execution
energy and energy-delay product (EDP). A special branch
and bound algorithm is devised to modify the dataflow
graph (DFG) to a new one that takes advantage of the
extended redundant-digit instruction set. The DFG is
obtained, via an in-house tool, from the intermediate
code representation that is normally produced by the
utilized compiler. The required redundant-digit
arithmetic operations (including a multiplier, a
multiply accumulator, and three- to four-operand
redundant-digit adders specially designed for this
work) have been synthesized on 45nm NanGate technology
by a Synopsys Design Compiler. To evaluate the impact
of the proposed ISA augmentation on actual code
execution, the simulation and evaluation platform of
our choice is an MIPS processor whose ISA is extended
by the proposed redundant-digit instructions. Several
digital signal processing benchmarks are utilized as
the source of the baseline MIPS codes, which are
converted (via the aforementioned algorithm) to the
equivalent mixed binary/redundant-digit codes. Our
experiments, as such, show up to 26\% energy and 44\%
EDP savings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Andersson:2018:SAT,
author = "BJ{\"o}rn Andersson and Hyoseung Kim and Dionisio {De
Niz} and Mark Klein and Ragunathan (Raj) Rajkumar and
John Lehoczky",
title = "Schedulability Analysis of Tasks with
Corunner-Dependent Execution Times",
journal = j-TECS,
volume = "17",
number = "3",
pages = "71:1--71:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3203407",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Consider fixed-priority preemptive partitioned
scheduling of constrained-deadline sporadic tasks on a
multiprocessor. A task generates a sequence of jobs and
each job has a deadline that must be met. Assume tasks
have Corunner-dependent execution times; i.e., the
execution time of a job J depends on the set of jobs
that happen to execute (on other processors) at
instants when J executes. We present a model that
describes Corunner-dependent execution times. For this
model, we show that exact schedulability testing is
co-NP-hard in the strong sense. Facing this complexity,
we present a sufficient schedulability test, which has
pseudo-polynomial-time complexity if the number of
processors is fixed. We ran experiments with synthetic
software benchmarks on a quad-core Intel multicore
processor with the Linux/RK operating system and found
that for each task, its maximum measured response time
was bounded by the upper bound computed by our
theory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vasilios:2018:CSC,
author = "Kelefouras Vasilios and Keramidas Georgios and Voros
Nikolaos",
title = "Combining Software Cache Partitioning and Loop Tiling
for Effective Shared Cache Management",
journal = j-TECS,
volume = "17",
number = "3",
pages = "72:1--72:??",
month = jun,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3202663",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:35 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "One of the biggest challenges in multicore platforms
is shared cache management, especially for
data-dominant applications. Two commonly used
approaches for increasing shared cache utilization are
cache partitioning and loop tiling. However,
state-of-the-art compilers lack efficient cache
partitioning and loop tiling methods for two reasons.
First, cache partitioning and loop tiling are strongly
coupled together, and thus addressing them separately
is simply not effective. Second, cache partitioning and
loop tiling must be tailored to the target shared cache
architecture details and the memory characteristics of
the corunning workloads. To the best of our knowledge,
this is the first time that a methodology provides (1)
a theoretical foundation in the above-mentioned cache
management mechanisms and (2) a unified framework to
orchestrate these two mechanisms in tandem (not
separately). Our approach manages to lower the number
of main memory accesses by an order of magnitude
keeping at the same time the number of
arithmetic/addressing instructions to a minimal level.
We motivate this work by showcasing that cache
partitioning, loop tiling, data array layouts, shared
cache architecture details (i.e., cache size and
associativity), and the memory reuse patterns of the
executing tasks must be addressed together as one
problem, when a (near)-optimal solution is requested.
To this end, we present a search space exploration
analysis where our proposal is able to offer a vast
deduction in the required search space.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2018:EEC,
author = "Sandeep K. Shukla",
title = "Editorial: Early Career Researchers in Embedded
Computing",
journal = j-TECS,
volume = "17",
number = "4",
pages = "73:1--73:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241724",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vatanparvar:2018:DAB,
author = "Korosh Vatanparvar and Mohammad Abdullah {Al
Faruque}",
title = "Design and Analysis of Battery-Aware Automotive
Climate Control for Electric Vehicles",
journal = j-TECS,
volume = "17",
number = "4",
pages = "74:1--74:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3203408",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Electric Vehicles (EV) as a zero-emission means of
transportation encounter challenges in battery design
that cause a range anxieties for the drivers. Besides
the electric motor, the Heating, Ventilation, and Air
Conditioning (HVAC) system is another major contributor
to the power consumption that may influence the EV
battery lifetime and driving range. In the
state-of-the-art methodologies for battery management
systems, the battery performance is monitored and
improved. While in the automotive climate control, the
passenger's thermal comfort is the main objective.
Hence, the influence of the HVAC power on the battery
behavior for the purpose of jointly optimized battery
management and climate control has not been considered.
In this article, we propose an automotive climate
control methodology that is aware of the battery
behavior and performance, while maintaining the
passenger's thermal comfort. In our methodology,
battery parameters and cabin temperature are modeled
and estimated, and the HVAC utilization is optimized
and adjusted with respect to the electric motor and
HVAC power requests. Therefore, the battery stress
reduces, while the cabin temperature is maintained by
predicting and optimizing the system states in the
near-future. We have implemented our methodology and
compared its performance to the state-of-the-art in
terms of battery lifetime improvement and energy
consumption reduction. We have also conducted
experiments and analyses to explore multiple control
window sizes, drive profiles, ambient temperatures, and
modeling error rates in the methodology. It is shown
that our battery-aware climate control can extend the
battery lifetime by up to 13.2\% and reduce the energy
consumption by up to 14.4\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2018:MAC,
author = "Wen Pan and Tao Xie",
title = "A Mirroring-Assisted Channel-{RAID5} {SSD} for Mobile
Applications",
journal = j-TECS,
volume = "17",
number = "4",
pages = "75:1--75:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3209625",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Simply applying an existing redundant array of
independent disks (RAID) technique to enhance data
reliability within a single solid-state drive for
safety-critical mobile applications significantly
degrades performance. In this article, we first propose
a new RAID5 architecture called channel-RAID5 with
mirroring (CR5M) to alleviate the performance
degradation problem. Next, an associated data
reconstruction strategy called mirroring-assisted
channel-level reconstruction (MCR) is developed to
further shrink the window of vulnerability.
Experimental results demonstrate that compared with
channel-RAID5 (CR5), CR5M improves performance up to
40.2\%. Compared with disk-oriented reconstruction, a
traditional data reconstruction scheme, MCR on average
improves data recovery speed by 7.5\% while delivering
a similar performance during reconstruction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Omar:2018:DRH,
author = "Hamza Omar and Qingchuan Shi and Masab Ahmad and Halit
Dogan and Omer Khan",
title = "Declarative Resilience: a Holistic Soft-Error
Resilient Multicore Architecture that Trades off
Program Accuracy for Efficiency",
journal = j-TECS,
volume = "17",
number = "4",
pages = "76:1--76:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3210559",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "To protect multicores from soft-error perturbations,
research has explored various resiliency schemes that
provide high soft-error coverage. However, these
schemes incur high performance and energy overheads. We
observe that not all soft-error perturbations affect
program correctness, and some soft-errors only affect
program accuracy, i.e., the program completes with
certain acceptable deviations from error free outcome.
Thus, it is practical to improve processor efficiency
by trading off resiliency overheads with program
accuracy. This article proposes the idea of declarative
resilience that selectively applies strong resiliency
schemes for code regions that are crucial for program
correctness (crucial code) and lightweight resiliency
for code regions that are susceptible to program
accuracy deviations as a result of soft-errors
(non-crucial code). At the application level, crucial
and non-crucial code is identified based on its impact
on the program outcome. A cross-layer architecture
enables efficient resilience along with holistic
soft-error coverage. Only program accuracy is
compromised in the worst-case scenario of a soft-error
strike during non-crucial code execution. For a set of
machine-learning and graph analytic benchmarks,
declarative resilience reduces performance overhead
over a state-of-the-art system that applies strong
resiliency for all program code regions from $ \approx
1.43 \times $ to $ \approx 1.2 \times $.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2018:SLL,
author = "Guan Wang and Chuanqi Zang and Lei Ju and Mengying
Zhao and Xiaojun Cai and Zhiping Jia",
title = "Shared Last-Level Cache Management and Memory
Scheduling for {GPGPUs} with Hybrid Main Memory",
journal = j-TECS,
volume = "17",
number = "4",
pages = "77:1--77:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230643",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Memory intensive workloads become increasingly popular
on general purpose graphics processing units (GPGPUs),
and impose great challenges on the GPGPU memory
subsystem design. On the other hand, with the recent
development of non-volatile memory (NVM) technologies,
hybrid memory combining both DRAM and NVM achieves high
performance, low power, and high density
simultaneously, which provides a promising main memory
design for GPGPUs. In this article, we explore the
shared last-level cache management for GPGPUs with
consideration of the underlying hybrid main memory. To
improve the overall memory subsystem performance, we
exploit the characteristics of both the asymmetric
read/write latency of the hybrid main memory
architecture, as well as the memory coalescing feature
of GPGPUs. In particular, to reduce the average cost of
L2 cache misses, we prioritize cache blocks from DRAM
or NVM based on observations that operations to NVM
part of main memory have a large impact on the system
performance. Furthermore, the cache management scheme
also integrates the GPU memory coalescing and cache
bypassing techniques to improve the overall system
performance. To minimize the impact of memory
divergence behaviors among simultaneously executed
groups of threads, we propose a hybrid main memory and
warp aware memory scheduling mechanism for GPGPUs.
Experimental results show that in the context of a
hybrid main memory system, our proposed L2 cache
management policy and memory scheduling mechanism
improve performance by 15.69\% on average for memory
intensive benchmarks, whereas the maximum gain can be
up to 29\% and achieve an average memory subsystem
energy reduction of 21.27\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2018:DFM,
author = "Xiaoxuan Liang and Zhangqin Huang and Shengqi Yang and
Lanxin Qiu",
title = "Device-Free Motion \& Trajectory Detection via
{RFID}",
journal = j-TECS,
volume = "17",
number = "4",
pages = "78:1--78:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230644",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Compared with traditional methods that employ inertial
sensors or wireless sensors, device-free approaches do
not require that people carry devices, and they are
considered a useful technique for indoor navigation and
posture recognition. However, few existing methods can
detect the trajectory and movements of humans at the
same time. In this study, we propose a scheme called
PADAR for addressing these two problems simultaneously
by using passive radio frequency identification (RFID)
tags but without attaching them to the human body. The
idea is based on the principle of radio tomographic
imaging, where the variance in a tag's backscattered
radio frequency signal strength is influenced by human
movement. We integrated a commodity off-the-shelf RFID
reader with a two-dimensional phased array antenna and
a matrix of passive tags to evaluate the performance of
our scheme. We conducted experiments in a simulated
indoor environment. The experimental results showed
that PADAR achieved an accuracy of over 70\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ji:2018:ACP,
author = "Kecheng Ji and Ming Ling and Longxing Shi and Jianping
Pan",
title = "An Analytical Cache Performance Evaluation Framework
for Embedded Out-of-Order Processors Using Software
Characteristics",
journal = j-TECS,
volume = "17",
number = "4",
pages = "79:1--79:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3233182",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Utilizing analytical models to evaluate proposals or
provide guidance in high-level architecture decisions
is been becoming more and more attractive. A certain
number of methods have emerged regarding cache
behaviors and quantified insights in the last decade,
such as the stack distance theory and the memory level
parallelism (MLP) estimations. However, prior research
normally oversimplified the factors that need to be
considered in out-of-order processors, such as the
effects triggered by reordered memory instructions, and
multiple dependences among memory instructions, along
with the merged accesses in the same MSHR entry. These
ignored influences actually result in low and unstable
precisions of recent analytical models. By quantifying
the aforementioned effects, this article proposes a
cache performance evaluation framework equipped with
three analytical models, which can more accurately
predict cache misses, MLPs, and the average cache miss
service time, respectively. Similar to prior studies,
these analytical models are all fed with profiled
software characteristics in which case the architecture
evaluation process can be accelerated significantly
when compared with cycle-accurate simulations. We
evaluate the accuracy of proposed models compared with
gem5 cycle-accurate simulations with 16 benchmarks
chosen from Mobybench Suite 2.0, Mibench 1.0, and
Mediabench II. The average root mean square errors for
predicting cache misses, MLPs, and the average cache
miss service time are around 4\%, 5\%, and 8\%,
respectively. Meanwhile, the average error of
predicting the stall time due to cache misses by our
framework is as low as 8\%. The whole cache performance
estimation can be sped by about 15 times versus gem5
cycle-accurate simulations and 4 times when compared
with recent studies. Furthermore, we have shown and
studied the insights between different performance
metrics and the reorder buffer sizes by using our
models. As an application case of the framework, we
also demonstrate how to use our framework combined with
McPAT to find out Pareto optimal configurations for
cache design space explorations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ara:2018:SAM,
author = "Hadi Alizadeh Ara and Amir Behrouzian and Martijn
Hendriks and Marc Geilen and Dip Goswami and Twan
Basten",
title = "Scalable Analysis for Multi-Scale Dataflow Models",
journal = j-TECS,
volume = "17",
number = "4",
pages = "80:1--80:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3233183",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Multi-scale dataflow models have actors acting at
multiple granularity levels, e.g., a dataflow model of
a video processing application with operations on
frame, line, and pixel level. The state of the art
timing analysis methods for both static and dynamic
dataflow types aggregate the behaviours across all
granularity levels into one, often large iteration,
which is repeated without exploiting the structure
within such an iteration. This poses scalability issues
to dataflow analysis, because behaviour of the large
iteration is analysed by some form of simulation that
involves a large number of actor firings. We take a
fresh perspective of what is happening inside the large
iteration. We take advantage of the fact that the
iteration is a sequence of smaller behaviours, each
captured in a scenario, that are typically repeated
many times. We use the (max,+) linear model of dataflow
to represent each of the scenarios with a matrix. This
allows a compositional worst-case throughput analysis
of the repeated scenarios by raising the matrices to
the power of the number of repetitions, which scales
logarithmically with the number of repetitions, whereas
the existing throughput analysis scales linearly. We
moreover provide the first exact worst-case latency
analysis for scenario-aware dataflow. This
compositional latency analysis also scales
logarithmically when applied to multi-scale dataflow
models. We apply our new throughput and latency
analysis to several realistic applications. The results
confirm that our approach provides a fast and accurate
analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Altawy:2018:SLT,
author = "Riham Altawy and Raghvendra Rohit and Morgan He and
Kalikinkar Mandal and Gangqiang Yang and Guang Gong",
title = "{SLISCP-light}: Towards Hardware Optimized
Sponge-specific Cryptographic Permutations",
journal = j-TECS,
volume = "17",
number = "4",
pages = "81:1--81:??",
month = aug,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3233245",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The emerging areas in which highly resource
constrained devices are interacting wirelessly to
accomplish tasks have led manufacturers to embed
communication systems in them. Tiny low-end devices
such as sensor networks nodes and Radio Frequency
Identification (RFID) tags are of particular importance
due to their vulnerability to security attacks, which
makes protecting their communication privacy and
authenticity an essential matter. In this work, we
present a lightweight do-it-all cryptographic design
that offers the basic underlying functionalities to
secure embedded communication systems in tiny devices.
Specifically, we revisit the design approach of the
sLiSCP family of lightweight cryptographic
permutations, which was proposed in SAC 2017. sLiSCP is
designed to be used in a unified duplex sponge
construction to provide minimal overhead for multiple
cryptographic functionalities within one hardware
design. The design of sLiSCP follows a 4-subblock
Type-2 Generalized Feistel-like Structure (GFS) with
unkeyed round-reduced Simeck as the round function,
which are extremely efficient building blocks in terms
of their hardware area requirements. In SLISCP-light,
we tweak the GFS design and turn it into an elegant
Partial Substitution-Permutation Network construction,
which further reduces the hardware areas of the SLISCP
permutations by around 16\% of their original values.
The new design also enhances the bit diffusion and
algebraic properties of the permutations and enables us
to reduce the number of steps, thus achieving a better
throughput in both the hashing and authentication
modes. We perform a thorough security analysis of the
new design with respect to its diffusion, differential
and linear, and algebraic properties. For
SLISCP-light-192, we report parallel implementation
hardware areas of 1,820 (respectively, 1,892)GE in CMOS
65 nm (respectively, 130 nm) ASIC. The areas for
SLISCP-light-256 are 2,397 and 2,500GE in CMOS 65 nm
and 130 nm ASIC, respectively. Overall, the unified
duplex sponge mode of SLISCP-light-192, which provides
(authenticated) encryption and hashing functionalities,
satisfies the area (1,958GE), power (3.97 $ \mu $W),
and throughput (44.4kbps) requirements of passive RFID
tags.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2018:ENA,
author = "Sandeep K. Shukla",
title = "Editorial: Need for Artifact Verified Articles in
{{\booktitle{ACM Transactions}}}",
journal = j-TECS,
volume = "17",
number = "5",
pages = "82:1--82:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3282437",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3282437",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kalayappan:2018:PAH,
author = "Rajshekar Kalayappan and Smruti R. Sarangi",
title = "Providing Accountability in Heterogeneous
Systems-on-Chip",
journal = j-TECS,
volume = "17",
number = "5",
pages = "83:1--83:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241048",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3241048",
abstract = "When modern systems-on-chip (SoCs), containing designs
from different organizations, miscompute or
underperform in the field, discerning the responsible
component is a non-trivial task. A perfectly
accountable system is one in which the on-chip
component at fault is always unambiguously detected.
The achievement of accountability can be greatly aided
by the collection of runtime information that captures
the events in the system that led to the error. Such
information collection must be fair and impartial to
all parties. In this article, we prove that logging
messages communicated between components from different
organizations is sufficient to provide accountability,
provided the logs are authentic. We then construct a
solution based on this premise, with an on-chip trusted
auditing system to authenticate the logs. We present a
thorough design of the auditing system, and demonstrate
that its performance overhead is a mere 0.49\%, and its
area overhead is a mere 0.194\% (in a heterogeneous 48
core, 400 mm$^2$ chip). We also demonstrate the
viability of this solution using three representative
bugs found in popular commercial SoCs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhuiyan:2018:EER,
author = "Ashikahmed Bhuiyan and Zhishan Guo and Abusayeed
Saifullah and Nan Guan and Haoyi Xiong",
title = "Energy-Efficient Real-Time Scheduling of {DAG} Tasks",
journal = j-TECS,
volume = "17",
number = "5",
pages = "84:1--84:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241049",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3241049",
abstract = "This work studies energy-aware real-time scheduling of
a set of sporadic Directed Acyclic Graph (DAG) tasks
with implicit deadlines. While meeting all real-time
constraints, we try to identify the best task
allocation and execution pattern such that the average
power consumption of the whole platform is minimized.
To our knowledge, this is the first work that addresses
the power consumption issue in scheduling multiple DAG
tasks on multi-cores and allows intra-task processor
sharing. First, we adapt the decomposition-based
framework for federated scheduling and propose an
energy-sub-optimal scheduler. Then, we derive an
approximation algorithm to identify processors to be
merged together for further improvements in
energy-efficiency. The effectiveness of the proposed
approach is evaluated both theoretically via
approximation ratio bounds and also experimentally
through simulation study. Experimental results on
randomly generated workloads show that our algorithms
achieve an energy saving of 60\% to 68\% compared to
existing DAG task schedulers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wei:2018:SAE,
author = "Yi-Hung Wei and Quan Leng and Wei-Ju Chen and Aloysius
K. Mok and Song Han",
title = "Schedule Adaptation for Ensuring Reliability in
{RT-WiFi}-Based Networked Embedded Systems",
journal = j-TECS,
volume = "17",
number = "5",
pages = "85:1--85:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3236011",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3236011",
abstract = "With the ever-growing interests in applying wireless
technologies for networked embedded systems to serve as
the communication fabric, many real-time wireless
technologies have been recently developed to support
time-critical sensing and control applications. We
proposed in previous work the RT-WiFi protocol that
provides real-time high-speed predictable data delivery
and enables designs to meet time-critical industrial
needs. However, without explicit reliability
enforcement mechanisms, our previous RT-WiFi design is
either subject to uncontrolled packet loss due to noise
and other interferences or may suffer from inefficient
communication channel usage. In this article, we
explicitly consider interference from both Wi-Fi and
non-Wi-Fi based interference sources and propose two
sets of effective solutions for reliable data
transmissions in RT-WiFi-based networked embedded
systems. To improve reliability against general
non-Wi-Fi based interference, based on rate adaptation
and retransmission techniques, we present an optimal
real-time rate adaption algorithm together with a
communication link scheduler that has low network
management overhead. A novel technique called
overbooking is introduced to further improve the
schedulability of the communication link scheduler
while maintaining the required communication
reliability. For Wi-Fi-based interference, we present
mechanisms that utilize virtual carrier sensing to
provide reliable data transmission while co-existing
with regular Wi-Fi networks. We have implemented the
proposed algorithms in the RT-WiFi network management
framework and demonstrated the system performance with
a series of experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sotiriou-Xanthopoulos:2018:OBV,
author = "Efstathios Sotiriou-Xanthopoulos and Leonard Masing
and Sotirios Xydis and Kostas Siozios and J{\"u}rgen
Becker and Dimitrios Soudris",
title = "{OpenCL}-based Virtual Prototyping and Simulation of
Many-Accelerator Architectures",
journal = j-TECS,
volume = "17",
number = "5",
pages = "86:1--86:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242179",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3242179",
abstract = "Heterogeneous architectures featuring multiple
hardware accelerators have been proposed as a promising
solution for meeting the ever-increasing performance
and power requirements of embedded systems. However,
the existence of numerous design parameters may result
in different architectural schemes and thus in extra
design effort. To address this issue, OpenCL-based
frameworks have been recently utilized for FPGA
programming, to enable the portability of a source code
to multiple architectures. However, such OpenCL
frameworks focus on RTL design, thus not enabling rapid
prototyping and abstracted modeling of complex systems.
Virtual Prototyping aims to overcome this problem by
enabling the system modeling in higher abstraction
levels. This article combines the benefits of OpenCL
and Virtual Prototyping, by proposing an OpenCL-based
prototyping framework for data-parallel
many-accelerator systems, which (a) creates a SystemC
Virtual Platform from OpenCL, (b) provides a
co-simulation environment for the host and the Virtual
Platform, (c) offers memory and interconnection models
for parallel data processing, and (d) enables the
system evaluation with alternative real number
representations (e.g., fixed-point or 16-bit
floating-point).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sababha:2018:RBF,
author = "Belal H. Sababha and Yazan A. Alqudah",
title = "A Reconfiguration-Based Fault-Tolerant Anti-Lock
Brake-by-Wire System",
journal = j-TECS,
volume = "17",
number = "5",
pages = "87:1--87:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242178",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3242178",
abstract = "Anti-Lock Braking Systems (ABS) and Brake-by-Wire
Systems (BBW) are safety-critical applications by
nature. Such systems are required to demonstrate high
degrees of dependability. Fault-tolerance is the
primary means to achieve dependability at runtime and
has been an active research area for decades.
Fault-tolerance is usually achieved in traditional
embedded computing systems through redundancy and
voting methods. In such systems, hardware units,
actuators, sensors, and communication networks are
replicated where special voters vote against faulty
units. In addition to traditional hardware and software
redundancy, hybrid and reconfiguration-based approaches
to fault-tolerance are evolving. In this article, we
present a reconfiguration-based fault-tolerant approach
to achieve high dependability in ABS BBW braking
systems. The proposed architecture makes use of other
components of less safety-critical systems to maintain
high dependability in the more safety-critical systems.
This is achieved by migrating safety-critical software
tasks from embedded computer hardware that runs into a
malfunction to other embedded computing hardware
running less-critical software tasks. Or by using a
different configuration in terms of the used speed
sensors and type of ABS. The proposed architecture is
on average 20\% more reliable than conventional ABS
architectures assuming equal reliabilities of different
components.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jin:2018:PAR,
author = "Xi Jin and Nan Guan and Changqing Xia and Jintao Wang
and Peng Zeng",
title = "Packet Aggregation Real-Time Scheduling for
Large-Scale {WIA--PA} Industrial Wireless Sensor
Networks",
journal = j-TECS,
volume = "17",
number = "5",
pages = "88:1--88:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3266228",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3266228",
abstract = "The IEC standard WIA-PA is a communication protocol
for industrial wireless sensor networks. Its special
features, including a hierarchical topology, hybrid
centralized-distributed management and packet
aggregation make it suitable for large-scale industrial
wireless sensor networks. Industrial systems place
large real-time requirements on wireless sensor
networks. However, the WIA-PA standard does not specify
the transmission methods, which are vital to the
real-time performance of wireless networks, and little
work has been done to address this problem. In this
article, we propose a real-time aggregation scheduling
method for WIA-PA networks. First, to satisfy the
real-time constraints on dataflows, we propose a method
that combines the real-time theory with the classical
bin-packing method to aggregate original packets into
the minimum number of aggregated packets. The
simulation results indicate that our method outperforms
the traditional bin-packing method, aggregating up to
35\% fewer packets, and improves the real-time
performance by up to 10\%. Second, to make it possible
to solve the scheduling problem of WIA-PA networks
using the classical scheduling algorithms, we transform
the ragged time slots of WIA-PA networks to a universal
model. In the simulation, a large number of WIA-PA
networks are randomly generated to evaluate the
performances of several real-time scheduling
algorithms. By comparing the results, we obtain that
the earliest deadline first real-time scheduling
algorithm is the preferred method for WIA-PA
networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Weichslgartner:2018:DTR,
author = "Andreas Weichslgartner and Stefan Wildermann and
Deepak Gangadharan and Michael Gla{\ss} and J{\"u}rgen
Teich",
title = "A Design--Time\slash Run-Time Application Mapping
Methodology for Predictable Execution Time in
{MPSoCs}",
journal = j-TECS,
volume = "17",
number = "5",
pages = "89:1--89:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274665",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274665",
abstract = "Executing multiple applications on a single MPSoC
brings the major challenge of satisfying multiple
quality requirements regarding real-time, energy, and
so on. Hybrid application mapping denotes the
combination of design-time analysis with run-time
application mapping. In this article, we present such a
methodology, which comprises a design space exploration
coupled with a formal performance analysis. This
results in several resource reservation configurations,
optimized for multiple objectives, with verified
real-time guarantees for each individual application.
The Pareto-optimal configurations are handed over to
run-time management, which searches for a suitable
mapping according to this information. To provide any
real-time guarantees, the performance analysis needs to
be composable and the influence of the applications on
each other has to be bounded. We achieve this either by
spatial or a novel temporal isolation for tasks and by
exploiting composable networks-on-chip (NoCs). With the
proposed temporal isolation, tasks of different
applications can be mapped to the same resource, while,
with spatial isolation, one computing resource can be
exclusively used by only one application. The
experiments reveal that the success rate in finding
feasible application mappings can be increased by the
proposed temporal isolation by up to 30\% and energy
consumption can be reduced compared to spatial
isolation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hassan:2018:EID,
author = "Mohamed Hassan and Anirudh M. Kaushik and Hiren
Patel",
title = "Exposing Implementation Details of Embedded {DRAM}
Memory Controllers through Latency-based Analysis",
journal = j-TECS,
volume = "17",
number = "5",
pages = "90:1--90:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274281",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274281",
abstract = "We explore techniques to reverse-engineer DRAM
embedded memory controllers (MCs), including page
policies, address mapping, and command arbitration.
There are several benefits to knowing this information:
They allow tightening worst-case bounds of embedded
systems and platform-aware optimizations at the
operating system, source-code, and compiler levels. We
develop a latency-based analysis, which we use to
devise algorithms and C programs to extract MC
properties. We show the effectiveness of the proposed
approach by reverse-engineering the MC details in the
XUPV5-LX110T Xilinx platform. Furthermore, to cover a
breadth of policies, we use a simulation framework and
document our findings.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2019:EES,
author = "Sandeep K. Shukla",
title = "Editorial: Embedded Security Challenge: Cyber Security
Contests in the Embedded Computing Domain",
journal = j-TECS,
volume = "17",
number = "6",
pages = "91:1--91:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3293502",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2019:DOS,
author = "Hui Sun and Jianzhong Huang and Xiao Qin and
Changsheng Xie",
title = "{DLSpace}: Optimizing {SSD} Lifetime via An Efficient
Distributed Log Space Allocation Strategy",
journal = j-TECS,
volume = "17",
number = "6",
pages = "92:1--92:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3284749",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3284749",
abstract = "Due to limited numbers of program/erase cycles (i.e.,
P/Es) of NAND Flash, excessive out-of-place update and
erase-before-write operations wear out these P/Es
during garbage collections, which adversely shorten
solid state disk (i.e., SSD) lifetime. The log space in
NAND Flash space of an SSD performs as an updated
page's buffer, which lowers garbage-collection
frequency while reducing consumption of P/Es to extend
SSD lifetime. In this article, we propose DLSpace, a
novel distributed log space allocation strategy named
distributed log space, which divides log space into
block-level log space and page-level log space to
significantly optimize SSD lifetime. DLSpace's log page
space is dedicated to data pages in a data block. Such
log page space only buffers page-update operations in
this data block; thereby the use of log blocks for
postponing garbage collection delays. DLSpace is
conducive to fully utilizing pages in data and log
blocks to avoid erasures of blocks with free pages.
Consequently, DLSpace decreases write amplification by
reducing excessive valid page-rewrite and block-erase
operations under random-write-intensive workloads. We
carried out quantitative research on the extension of
SSD lifetime by virtue of three metrics (i.e., write
amplification, the number of block-erase operations,
and the delay time before the first garbage collection
occurring). Experimental results reveal that compared
with the existing t raditional allocation strategy for
l og space (i.e., TLSpace), DLSpace reduces write
amplification and the number of erase operations by up
to 55.2\% and 64.1\% to the most extent, respectively.
DLSpace also extends TLSpace's delay time of garbage
collections by 73.3\% to optimize SSD lifetime.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Balsamo:2019:MPN,
author = "Domenico Balsamo and Benjamin J. Fletcher and Alex S.
Weddell and Giorgos Karatziolas and Bashir M.
Al-Hashimi and Geoff V. Merrett",
title = "Momentum: Power-neutral Performance Scaling with
Intrinsic {MPPT} for Energy Harvesting Computing
Systems",
journal = j-TECS,
volume = "17",
number = "6",
pages = "93:1--93:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3281300",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Recent research has looked to supplement or even
replace the batteries in embedded computing systems
with energy harvesting, where energy is derived from
the device's environment. However, such supplies are
generally unpredictable and highly variable, and hence
systems typically incorporate large external energy
buffers (e.g., supercapacitors) to sustain computation;
however, these pose environmental issues and increase
system size and cost. This article proposes Momentum, a
general power-neutral methodology, with intrinsic
system-wide maximum power point tracking, that can be
applied to a wide range of different computing systems,
where the system dynamically scales its performance
(and hence power consumption) to optimize computational
progress depending on the power availability. Momentum
enables the system to operate around an efficient
operating voltage, maximizing forward application
execution, without adding any external tracking or
control units. This methodology combines at runtime (1)
a hierarchical control strategy that utilizes available
power management controls (such as dynamic voltage and
frequency scaling, and core hot-plugging) to achieve
efficient power-neutral operation; (2) a software-based
maximum power point tracking scheme (unlike existing
approaches, this does not require any additional
hardware), which adapts the system power consumption so
that it can work at the optimal operating voltage,
considering the efficiency of the entire system rather
than just the energy harvester; and (3) experimental
validation on two different scales of computing system:
a low power microcontroller (operating from the
already-present 4.7 $ \mu $F decoupling capacitance)
and a multi-processor system-on-chip (operating from
15.4mF added capacitance). Experimental results from
both a controlled supply and energy harvesting source
show that Momentum operates correctly on both platforms
and exhibits improvements in forward application
execution of up to 11\% when compared to existing
power-neutral approaches and 46\% compared to existing
static approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sheikh:2019:EEM,
author = "Saad Zia Sheikh and Muhammad Adeel Pasha",
title = "Energy-Efficient Multicore Scheduling for Hard
Real-Time Systems: a Survey",
journal = j-TECS,
volume = "17",
number = "6",
pages = "94:1--94:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3291387",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "As real-time embedded systems are evolving in scale
and complexity, the demand for a higher performance at
a minimum energy consumption has become a necessity.
Consequently, many embedded systems are now adopting
multicore architectures into their design. However,
scheduling on multicores is not a trivial task and
scheduling to minimize the energy consumption further
increases the complexity of the problem. This problem
is especially aggravated for hard real-time systems
where failure to meet a deadline can be catastrophic.
Such scheduling algorithms yearn for a polynomial time
complexity for the task-to-core assignment problem with
an objective to minimize the overall energy
consumption. There is now a trend toward heterogeneous
multicores where cores differ in power, performance,
and architectural capabilities. The desired performance
and energy consumption is attained by assigning a task
to the core that is best suited for it. In this
article, we present a survey on energy-efficient
multicore scheduling algorithms for hard real-time
systems. We summarize various algorithms reported in
the literature and classify them based on Partitioned,
Semi-Partitioned, and Global scheduling techniques for
both homogeneous and heterogeneous multicores. We also
present a detailed discussion on various open issues
within this domain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xie:2019:EWA,
author = "Guoqi Xie and Gang Zeng and Ryo Kurachi and Hiroaki
Takada and Renfa Li and Keqin Li",
title = "Exact {WCRT} Analysis for Message-Processing Tasks on
Gateway-Integrated In-Vehicle {CAN} Clusters",
journal = j-TECS,
volume = "17",
number = "6",
pages = "95:1--95:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3284178",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "A typical automotive integrated architecture is a
controller area network (CAN) cluster integrated by a
central gateway. This study proposes a novel and exact
worst-case response time (WCRT) analysis method for
message-processing tasks in the gateway. We first
propose a round search method to obtain lower bound on
response time (LBRT) and upper bound on response time
(UBRT), respectively. We then obtain the exact WCRT
belonging to the scope of the LBRT and UBRT with an
effective non-exhaustive exploration. Experimental
results on a real CAN message set reveal that the
proposed exact analysis method can reduce 99.99999\%
combinations on large-scale CAN clusters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Feng:2019:EUH,
author = "Zhiwei Feng and Nan Guan and Mingsong Lv and Weichen
Liu and Qingxu Deng and Xue Liu and Wang Yi",
title = "An Efficient {UAV} Hijacking Detection Method Using
Onboard Inertial Measurement Unit",
journal = j-TECS,
volume = "17",
number = "6",
pages = "96:1--96:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3289390",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3289390",
abstract = "With the fast growth of civil drones, their security
problems meet significant challenges. A commercial
drone may be hijacked by a GPS-spoofing attack for
illegal activities, such as terrorist attacks. The
target of this article is to develop a technique that
only uses onboard gyroscopes to determine whether a
drone has been hijacked. Ideally, GPS data and the
angular velocities measured by gyroscopes can be used
to estimate the acceleration of a drone, which can be
further compared with the measurement of the
accelerometer to detect whether a drone has been
hijacked. However, the detection results may not always
be accurate due to some calculation and measurement
errors, especially when no hijacking occurs in curve
trajectory situations. To overcome this, in this
article, we propose a novel and simple method to detect
hijacking only based on gyroscopes' measurements and
GPS data, without using any accelerometer in the
detection procedure. The computational complexity of
our method is very low, which is suitable to be
implemented in the drones with micro-controllers. On
the other hand, the proposed method does not rely on
any accelerometer to detect attacks, which means it
receives less information in the detection procedure
and may reduce the results accuracy in some special
situations. While the previous method can compensate
for this flaw, the high detection results also can be
guaranteed by using the above two methods. Experiments
with a quad-rotor drone are conducted to show the
effectiveness of the proposed method and the
combination method.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yan:2019:CAR,
author = "Yin Yan and Girish Gokul and Karthik Dantu and Steven
Y. Ko and Lukasz Ziarek and Jan Vitek",
title = "Can {Android} Run on Time? {Extending} and Measuring
the {Android} Platform's Timeliness",
journal = j-TECS,
volume = "17",
number = "6",
pages = "97:1--97:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3289257",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Time predictability is difficult to achieve in the
complex, layered execution environments that are common
in modern embedded devices such as smartphones. We
explore adopting the Android programming model for a
range of embedded applications that extends beyond
mobile devices, under the constraint that changes to
widely used libraries should be minimized. The
challenges we explore include the interplay between
real-time activities and the rest of the system, how to
express the timeliness requirements of components, and
how well those requirements can be met on stock
embedded platforms. We detail the design and
implementation of our modifications to the Android
framework along with a real-time VM and OS, and we
provide experimental data validating feasibility over
five applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Abkenar:2019:GRU,
author = "Amin B. Abkenar and Seng W. Loke and Arkady Zaslavsky
and Wenny Rahayu",
title = "{GroupSense}: Recognizing and Understanding Group
Physical Activities using Multi-Device Embedded
Sensing",
journal = j-TECS,
volume = "17",
number = "6",
pages = "98:1--98:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3295747",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Human activity recognition using embedded mobile and
embedded sensors is becoming increasingly important.
Scaling up from individuals to groups, that is, Group
Activity Recognition (GAR), has attracted significant
attention recently. This article proposes a model and
modeling language for GAR called GroupSense-L and a
novel distributed middleware called GroupSense for
mobile GAR. We implemented and tested GroupSense using
smartphone sensors, smartwatch sensors, and embedded
sensors in things, where we have a protocol for these
different devices to exchange information required for
GAR. A range of continuous group activities (from
simple to fairly complex) illustrates our approach and
demonstrates the feasibility of our model and richness
of the proposed specialization. We then conclude with
lessons learned for GAR and future work.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Derler:2019:GES,
author = "Patricia Derler and Klaus Schneider and Jean-Pierre
Talpin",
title = "Guest Editorial: Special Issue of {ACM TECS on the
ACM--IEEE International Conference on Formal Methods
and Models for System Design (MEMOCODE 2017)}",
journal = j-TECS,
volume = "18",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3292422",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3292422",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2019:EHF,
author = "Sandeep K. Shukla",
title = "Editorial: Human Factors in Embedded Computing",
journal = j-TECS,
volume = "18",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3302888",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3302888",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nuzzo:2019:SAG,
author = "Pierluigi Nuzzo and Jiwei Li and Alberto L.
Sangiovanni-Vincentelli and Yugeng Xi and Dewei Li",
title = "Stochastic Assume--Guarantee Contracts for
Cyber-Physical System Design",
journal = j-TECS,
volume = "18",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3243216",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3243216",
abstract = "We present an assume-guarantee contract framework for
cyber-physical system design under probabilistic
requirements. Given a stochastic linear system and a
set of requirements captured by bounded Stochastic
Signal Temporal Logic (StSTL) contracts, we propose
algorithms to check contract compatibility,
consistency, and refinement, and generate a sequence of
control inputs that satisfies a contract. We leverage
encodings of the verification and control synthesis
tasks into mixed integer optimization problems, and
conservative approximations of probabilistic
constraints that produce sound and tractable problem
formulations. We illustrate the effectiveness of our
approach on three case studies, including the design of
controllers for aircraft power distribution networks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Plassan:2019:MMA,
author = "Guillaume Plassan and Katell Morin-Allory and
Dominique Borrione",
title = "Mining Missing Assumptions from Counter-Examples",
journal = j-TECS,
volume = "18",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3288759",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3288759",
abstract = "During the formal functional verification of
Register-Transfer Level designs, a false failure is
often observed. Most of the time, this failure is
caused by an underconstrained model. The analysis of
the root cause for the verification error and the
creation of missing assumptions are a significant time
burden. In this article, we present a methodology to
automatically mine these missing assumptions from
counter-examples. First, multiple counter-examples are
generated for the same property. Then, relevant
behaviors are mined from the counter-examples. Finally,
corresponding assumptions are filtered and a small
amount is returned to the user for review.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fellner:2019:MBM,
author = "Andreas Fellner and Willibald Krenn and Rupert Schlick
and Thorsten Tarrach and Georg Weissenbacher",
title = "Model-based, Mutation-driven Test-case Generation Via
Heuristic-guided Branching Search",
journal = j-TECS,
volume = "18",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3289256",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3289256",
abstract = "This work introduces a heuristic-guided branching
search algorithm for model-based, mutation-driven
test-case generation. The algorithm is designed towards
the efficient and computationally tractable exploration
of discrete, non-deterministic models with huge state
spaces. Asynchronous parallel processing is a key
feature of the algorithm. The algorithm is inspired by
the successful path planning algorithm Rapidly
exploring Random Trees (RRT). We adapt RRT in several
aspects towards test-case generation. Most notably, we
introduce parametrized heuristics for start and
successor state selection, as well as a mechanism to
construct test cases from the data produced during the
search. We implemented our algorithm in the existing
test-case generation framework MoMuT. We present an
extensive evaluation of the proposed heuristics and
parameters of the algorithm, based on a diverse set of
demanding models obtained in an industrial context. In
total, we continuously utilized 128 CPU cores on three
servers for several weeks to gather the experimental
data presented. We show that branching search works
well and the use of multiple heuristics is justified.
With our new algorithm, we are now able to process
models consisting of over 2,300 concurrent objects. To
our knowledge, there is no other mutation-driven
test-case generation tool that is able to process
models of this magnitude.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Edwards:2019:CDC,
author = "Stephen A. Edwards and Richard Townsend and Martha
Barker and Martha A. Kim",
title = "Compositional Dataflow Circuits",
journal = j-TECS,
volume = "18",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274280",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274280",
abstract = "We present a technique for implementing dataflow
networks as compositional hardware circuits. We first
define an abstract dataflow model with unbounded
buffers that supports data-dependent blocks (mux,
demux, and nondeterministic merge); we then show how to
faithfully implement such networks with bounded buffers
and handshaking. Handshaking admits compositionality:
our circuits can be connected with or without buffers,
and combinational cycles arise only from a completely
unbuffered cycle. While bounding buffer sizes can cause
the system to deadlock prematurely, the system is
guaranteed to produce the same, correct, data before
then. Thus, unless the system deadlocks, inserting or
removing buffers only affects its performance. We
demonstrate how this enables design space to be
explored.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reynolds:2019:MME,
author = "Thomas N. Reynolds and Adam Procter and William L.
Harrison and Gerard Allwein",
title = "The Mechanized Marriage of Effects and Monads with
Applications to High-assurance Hardware",
journal = j-TECS,
volume = "18",
number = "1",
pages = "6:1--6:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274282",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274282",
abstract = "Constructing high-assurance, secure hardware remains a
challenge, because to do so relies on both a verifiable
means of hardware description and implementation.
However, production hardware description languages
(HDL) lack the formal underpinnings required by formal
methods in security. Still, there is no such thing as
high-assurance systems without high-assurance hardware.
We present a core calculus of secure hardware
description with its formal semantics, security type
system, and mechanization in Coq. This calculus is the
core of the functional HDL, ReWire, shown in previous
work to have useful applications in reconfigurable
computing. This work supports a full-fledged, formal
methodology for producing high-assurance hardware.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chattopadhyay:2019:QIL,
author = "Sudipta Chattopadhyay and Moritz Beck and Ahmed Rezine
and Andreas Zeller",
title = "Quantifying the Information Leakage in Cache Attacks
via Symbolic Execution",
journal = j-TECS,
volume = "18",
number = "1",
pages = "7:1--7:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3288758",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3288758",
abstract = "Cache attacks allow attackers to infer the properties
of a secret execution by observing cache hits and
misses. But how much information can actually leak
through such attacks? For a given program, a cache
model, and an input, our CHALICE framework leverages
symbolic execution to compute the amount of information
that can possibly leak through cache attacks. At the
core of CHALICE is a novel approach to quantify
information leakage that can highlight critical cache
side-channel leakage on arbitrary binary code. In our
evaluation on real-world programs from OpenSSL and
Linux GDK libraries, CHALICE effectively quantifies
information leakage: For an AES-128 implementation on
Linux, for instance, CHALICE finds that a cache attack
can leak as much as 127 out of 128 bits of the
encryption key.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2019:ERR,
author = "Taeju Park and Kang G. Shin",
title = "{EACAN}: Reliable and Resource-Efficient {CAN}
Communications",
journal = j-TECS,
volume = "18",
number = "1",
pages = "8:1--8:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301309",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301309",
abstract = "Worst-case-based timing verification for the
controller area network (CAN) has been the bottleneck
to efficient use of its bandwidth. Especially, this
inefficiency comes from the worst-case transmission
error rate (WCTER) when transmission errors are
accounted for. To alleviate this inefficiency, we
propose a runtime adaptation scheme, error-adaptive CAN
(EACAN). EACAN observes the behavior of transmission
errors at runtime, and reconfigures the message period
based on the observation to meet the timing-failure
requirement. We experimentally evaluate the bandwidth
utilization of both EACAN- and WCTER-based
verification, showing that the former improves the
bandwidth utilization by 14\% over the latter.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pederson:2019:BCL,
author = "Daniel J. Pederson and Christopher J. Quinkert and
Muhammad A. Arafat and Jesse P. Somann and Jack D.
Williams and Rebecca A. Bercich and Zhi Wang and
Gabriel O. Albors and John G. R. Jefferys and Pedro P.
Irazoqui",
title = "The {Bionode}: a Closed-Loop Neuromodulation Implant",
journal = j-TECS,
volume = "18",
number = "1",
pages = "9:1--9:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301310",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301310",
abstract = "Implantable closed-loop neuromodulation devices for
use in long-term chronic studies in a lab or clinical
trial are expensive to acquire and difficult to modify
for specific use cases. This article documents the
design and fabrication of a wireless implantable device
using only commercially available off-the-shelf (COTS)
components. This device, called the Bionode, can record
and transmit up to four channels of biopotential data
while simultaneously providing biphasic
constant-current stimulation. The Bionode is a viable,
low-cost, reusable, and easily modifiable research tool
with clinical implications that has gained widespread
use in various research projects at Purdue
University.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Venkataramani:2019:SMM,
author = "Vanchinathan Venkataramani and Mun Choon Chan and
Tulika Mitra",
title = "Scratchpad-Memory Management for Multi-Threaded
Applications on Many-Core Architectures",
journal = j-TECS,
volume = "18",
number = "1",
pages = "10:1--10:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301308",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301308",
abstract = "Contemporary many-core architectures, such as Adapteva
Epiphany and Sunway TaihuLight, employ per-core
software-controlled Scratchpad Memory (SPM) rather than
caches for better performance-per-watt and
predictability. In these architectures, a core is
allowed to access its own SPM as well as remote SPMs
through the Network-On-Chip (NoC). However, the
compiler/programmer is required to explicitly manage
the movement of data between SPMs and off-chip memory.
Utilizing SPMs for multi-threaded applications is even
more challenging, as the shared variables across the
threads need to be placed appropriately. Accessing
variables from remote SPMs with higher access latency
further complicates this problem as certain links in
the NoC may be heavily contended by multiple threads.
Therefore, certain variables may need to be replicated
in multiple SPMs to reduce the contention delay and/or
the overall access time. We present Coordinated Data
Management (CDM), a compile-time framework that
automatically identifies shared/private variables and
places them with replication (if necessary) to suitable
on-chip or off-chip memory, taking NoC contention into
consideration. We develop both an exact Integer Linear
Programming (ILP) formulation as well as an iterative,
scalable algorithm for placing the data variables in
multi-threaded applications on many-core SPMs.
Experimental evaluation on the Parallella hardware
platform confirms that our allocation strategy reduces
the overall execution time and energy consumption by $
1.84 \times $ and $ 1.83 \times $, respectively, when
compared to the existing approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rhisheekesan:2019:CFC,
author = "Abhishek Rhisheekesan and Reiley Jeyapaul and Aviral
Shrivastava",
title = "Control Flow Checking or Not? (for Soft Errors)",
journal = j-TECS,
volume = "18",
number = "1",
pages = "11:1--11:??",
month = feb,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301311",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:42 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301311",
abstract = "Huge leaps in performance and power improvements of
computing systems are driven by rapid technology
scaling, but technology scaling has also rendered
computing systems susceptible to soft errors. Among the
soft error protection techniques, Control Flow Checking
(CFC) based techniques have gained a reputation of
being lightweight yet effective. The main idea behind
CFCs is to check if the program is executing the
instructions in the right order. In order to validate
the protection claims of existing CFCs, we develop a
systematic and quantitative method to evaluate the
protection achieved by CFCs using the metric of
vulnerability. Our quantitative analysis indicates that
existing CFC techniques are not only ineffective in
providing protection from soft faults, but incur
additional performance and power overheads. Our results
show that software-only CFC protection schemes increase
system vulnerability by 18\%--21\% with 17\%--38\%
performance overhead and hybrid CFC protection
increases vulnerability by 5\%. Although the
vulnerability remains almost the same for hardware-only
CFC protection, they incur overheads of design cost,
area, and power due to the hardware modifications
required for their implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Roy:2019:CPR,
author = "Debapriya Basu Roy and Shivam Bhasin and Ivica
Nikoli{\'c} and Debdeep Mukhopadhyay",
title = "Combining {PUF} with {RLUTs}: a Two-party
Pay-per-device {IP} Licensing Scheme on {FPGAs}",
journal = j-TECS,
volume = "18",
number = "2",
pages = "12:1--12:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301307",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301307",
abstract = "With the popularity of modern FPGAs, the business of
FPGA specific intellectual properties (IP) is expanding
rapidly. This also brings in the concern of IP
protection. FPGA vendors are making serious efforts
toward IP protection, leading to standardization
schemes like IEEE P1735. However, efficient techniques
to prevent unauthorized overuse of IP still remain an
open question. In this article, we propose a two-party
IP protection scheme combining the re-configurable
look-up table primitive of modern FPGAs with physically
unclonable functions (PUF). The proposed scheme works
with the assumption that the FPGA vendor provides the
assurance of confidentiality and integrity of the
developed IP. The proposed scheme is considerably
lightweight compared to existing schemes, prevents
overuse, and does not involve FPGA vendors or trusted
third parties for IP licensing. The validation of the
proposed scheme is done on MCNC'91 benchmark and
third-party IPs like AES and lightweight MIPS
processors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhong:2019:SHS,
author = "Guanwen Zhong and Akshat Dubey and Cheng Tan and
Tulika Mitra",
title = "{Synergy}: an {HW\slash SW} Framework for High
Throughput {CNNs} on Embedded Heterogeneous {SoC}",
journal = j-TECS,
volume = "18",
number = "2",
pages = "13:1--13:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301278",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301278",
abstract = "Convolutional Neural Networks (CNN) have been widely
deployed in diverse application domains. There has been
significant progress in accelerating both their
training and inference using high-performance GPUs,
FPGAs, and custom ASICs for datacenter-scale
environments. The recent proliferation of mobile and
Internet of Things (IoT) devices have necessitated
real-time, energy-efficient deep neural network
inference on embedded-class, resource-constrained
platforms. In this context, we present Synergy, an
automated, hardware-software co-designed, pipelined,
high-throughput CNN inference framework on embedded
heterogeneous system-on-chip (SoC) architectures
(Xilinx Zynq). Synergy leverages, through
multi-threading, all the available on-chip resources,
which includes the dual-core ARM processor along with
the FPGA and the NEON Single-Instruction Multiple-Data
(SIMD) engines as accelerators. Moreover, Synergy
provides a unified abstraction of the heterogeneous
accelerators (FPGA and NEON) and can adapt to different
network configurations at runtime without changing the
underlying hardware accelerator architecture by
balancing workload across accelerators through
work-stealing. Synergy achieves 7.3X speedup, averaged
across seven CNN models, over a well-optimized
software-only solution. Synergy demonstrates
substantially better throughput and energy-efficiency
compared to the contemporary CNN implementations on the
same SoC architecture.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Guha:2019:SBS,
author = "Krishnendu Guha and Debasri Saha and Amlan
Chakrabarti",
title = "Stigmergy-Based Security for {SoC} Operations From
Runtime Performance Degradation of {SoC} Components",
journal = j-TECS,
volume = "18",
number = "2",
pages = "14:1--14:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301279",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301279",
abstract = "The semiconductor design industry of the embedded era
has embraced the globalization strategy for system on
chip (SoC) design. This involves incorporation of
various SoC components or intellectual properties
(IPs), procured from various third-party IP (3PIP)
vendors. However, trust of an SoC is challenged when a
supplied IP is counterfeit or implanted with a Hardware
Trojan Horse. Both roots of untrust may result in
sudden performance degradation at runtime. None of the
existing hardware security approaches organize the
behavior of the IPs at the low level, to ensure timely
completion of SoC operations. However, real-time SoC
operations are always associated with a deadline, and a
deadline miss due to sudden performance degradation of
any of the IPs may jeopardize mission-critical
applications. We seek refuge to the stigmergic behavior
exhibited in insect colonies to propose a decentralized
self-aware security approach. The self-aware security
modules attached with each IP works based on the
Observe-Decide-Act paradigm and not only detects
vulnerability but also organizes behavior of the IPs
dynamically at runtime so that the high-level objective
of task completion before a deadline is ensured.
Experimental validation and low overhead of our
proposed security modules over various benchmark IPs
and crypto SoCs depict the prospects of our proposed
mechanism.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2019:CRU,
author = "Alif Ahmed and Yuanwen Huang and Prabhat Mishra",
title = "Cache Reconfiguration Using Machine Learning for
Vulnerability-aware Energy Optimization",
journal = j-TECS,
volume = "18",
number = "2",
pages = "15:1--15:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309762",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309762",
abstract = "Dynamic cache reconfiguration has been widely explored
for energy optimization and performance improvement for
single-core systems. Cache partitioning techniques are
introduced for the shared cache in multicore systems to
alleviate inter-core interference. While these
techniques focus only on performance and energy, they
ignore vulnerability due to soft errors. In this
article, we present a static profiling based algorithm
to enable vulnerability-aware energy-optimization for
real-time multicore systems. Our approach can
efficiently search the space of cache configurations
and partitioning schemes for energy optimization while
task deadlines and vulnerability constraints are
satisfied. A machine learning technique has been
employed to minimize the static profiling time without
sacrificing the accuracy of results. Our experimental
results demonstrate that our approach can achieve
19.2\% average energy savings compared with the base
configuration, while drastically reducing the
vulnerability (49.3\% on average) compared to
state-of-the-art techniques. Furthermore, the machine
learning technique enabled more than 10x speedup in
static profiling time with a negligible prediction
error of 3\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lentaris:2019:SMF,
author = "George Lentaris and Konstantinos Maragos and Dimitrios
Soudris and Xenophon Zabulis and Manolis Lourakis",
title = "Single- and Multi-{FPGA} Acceleration of Dense Stereo
Vision for Planetary Rovers",
journal = j-TECS,
volume = "18",
number = "2",
pages = "16:1--16:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3312743",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3312743",
abstract = "Increased mobile autonomy is a vital requisite for
future planetary exploration rovers. Stereo vision is a
key enabling technology in this regard, as it can
passively reconstruct in three dimensions the
surroundings of a rover and facilitate the selection of
science targets and the planning of safe routes.
Nonetheless, accurate dense stereo algorithms are
computationally demanding. When executed on the
low-performance, radiation-hardened CPUs typically
installed on rovers, slow stereo processing severely
limits the driving speed and hence the science that can
be conducted in situ. Aiming to decrease execution time
while increasing the accuracy of stereo vision embedded
in future rovers, this article proposes HW/SW co-design
and acceleration on resource-constrained, space-grade
FPGAs. In a top-down approach, we develop a stereo
algorithm based on the space sweep paradigm, design its
parallel HW architecture, implement it with VHDL, and
demonstrate feasible solutions even on small-sized
devices with our multi-FPGA partitioning methodology.
To meet all cost, accuracy, and speed requirements set
by the European Space Agency for this system, we
customize our HW/SW co-processor by design space
exploration and testing on a Mars-like dataset.
Implemented on Xilinx Virtex technology, or European
NG-MEDIUM devices, the FPGA kernel processes a $ 1, 120
\times 1, 120 $ stereo pair in 1.7s--3.1s, utilizing
only 5.4--9.3 LUT6 and 200-312 RAMB18. The proposed
system exhibits up to $ 32 \times $ speedup over
desktop CPUs, or $ 2, 810 \times $ over space-grade
LEON3, and achieves a mean reconstruction error less
than 2cm up to 4m depth. Excluding errors exceeding 2cm
(which are less than 4\% of the total), the mean error
is under 8mm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Harb:2019:FIE,
author = "Salah Harb and Moath Jarrah",
title = "{FPGA} Implementation of the {ECC} Over {$ {\rm
GF}(2^m) $} for Small Embedded Applications",
journal = j-TECS,
volume = "18",
number = "2",
pages = "17:1--17:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310354",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310354",
abstract = "In this article, we propose a compact elliptic curve
cryptographic core over GF($ 2^m$). The proposed
architecture is based on the Lopez-Dahab projective
point arithmetic operations. To achieve efficiency in
resources usage, an iterative method that uses a
ROM-based state machine is developed for the elliptic
curve cryptography (ECC) point doubling and addition
operations. The compact ECC core has been implemented
using Virtex FPGA devices. The number of the required
slices is 2,102 at 321MHz and 6,738 slices at 262MHz
for different GF($ 2^m$). Extensive experiments were
conducted to compare our solution to existing methods
in the literature. Our compact core consumes less area
than all previously proposed methods. It also provides
an excellent performance for scalar multiplication. In
addition, the ECC core is implemented in ASIC 0.18 $
\mu $ m CMOS technology, and the results show excellent
performance. Therefore, our proposed ECC core method
provides a balance in terms of speed, area, and power
consumption. This makes the proposed design the right
choice for cryptosystems in limited-resource devices
such as cell phones, IP cores of SoCs, and smart cards.
Moreover, side-channel attack resistance is implemented
to prevent power analysis.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Arghavani:2019:CLB,
author = "Abbas Arghavani and Haibo Zhang and Zhiyi Huang and
Yawen Chen",
title = "{Chimp}: a Learning-based Power-aware Communication
Protocol for Wireless Body Area Networks",
journal = j-TECS,
volume = "18",
number = "2",
pages = "18:1--18:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309763",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309763",
abstract = "Radio links in wireless body area networks (WBANs)
commonly experience highly time-varying attenuation due
to the dynamic network topology and frequent occlusions
caused by body movements, making it challenging to
design a reliable, energy-efficient, and real-time
communication protocol for WBANs. In this article, we
present Chimp, a learning-based power-aware
communication protocol in which each sending node can
self-learn the channel quality and choose the best
transmission power level to reduce energy consumption
and interference range while still guaranteeing high
communication reliability. Chimp is designed based on
learning automata that uses only the acknowledgment
packets and motion data from a local gyroscope sensor
to infer the real-time channel status. We design a new
cost function that takes into account the energy
consumption, communication reliability and interference
and develop a new learning function that can guarantee
to select the optimal transmission power level to
minimize the cost function for any given channel
quality. For highly dynamic postures such as walking
and running, we exploit the correlation between channel
quality and motion data generated by a gyroscope sensor
to fastly estimate channel quality, eliminating the
need to use expensive channel sampling procedures. We
evaluate the performance of Chimp through experiments
using TelosB motes equipped with the MPU-9250 motion
sensor chip and compare it with the state-of-the-art
protocols in different body postures. Experimental
results demonstrate that Chimp outperforms existing
schemes and works efficiently in most common body
postures. In high-date-rate scenarios, it achieves
almost the same performance as the optimal power
assignment scheme in which the optimal power level for
each transmission is calculated based on the collected
channel measurements in an off-line manner.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jiang:2019:BSR,
author = "Zhe Jiang and Neil Audsley and Pan Dong",
title = "{BlueIO}: a Scalable Real-Time Hardware {I/O}
Virtualization System for Many-core Embedded Systems",
journal = j-TECS,
volume = "18",
number = "3",
pages = "19:1--19:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309765",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309765",
abstract = "In safety-critical systems, time predictability is
vital. This extends to I/O operations that require
predictability, timing-accuracy, parallel access,
scalability, and isolation. Currently, existing
approaches cannot achieve all these requirements at the
same time. In this article, we propose a framework of
hardware framework for real-time I/O
virtualization-termed BlueIO -to meet all these
requirements simultaneously. BlueIO integrates the
functionalities of I/O virtualization, low-layer I/O
drivers, and a clock cycle level timing-accurate I/O
controller (using the GPIOCP [36]). BlueIO provides
this functionality in the hardware layer, supporting
abstract virtualized access to I/O from the software
domain. The hardware implementation includes I/O
virtualization and I/O drivers, provides isolation and
parallel (concurrent) access to I/O operations, and
improves I/O performance. Furthermore, the approach
includes the previously proposed GPIOCP to guarantee
that I/O operations will occur at a specific clock
cycle (i.e., be timing-accurate and predictable). In
this article, we present a hardware consumption
analysis of BlueIO to show that it linearly scales with
the number of CPUs and I/O devices, which is evidenced
by our implementation in VLSI and FPGA. We also
describe the design and implementation of BlueIO and
demonstrate how a BlueIO-based system can be exploited
to meet real-time requirements with significant
improvements in I/O performance and a low running cost
on different OSs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2019:ERH,
author = "Sandeep K. Shukla",
title = "Editorial: Reflections on the History of
Cyber-Physical versus Embedded Systems",
journal = j-TECS,
volume = "18",
number = "3",
pages = "19:1--19:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3325115",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3325115",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tabrizi:2019:DLC,
author = "Farid Molazem Tabrizi and Karthik Pattabiraman",
title = "Design-Level and Code-Level Security Analysis of {IoT}
Devices",
journal = j-TECS,
volume = "18",
number = "3",
pages = "20:1--20:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310353",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310353",
abstract = "The Internet of Things (IoT) is playing an important
role in different aspects of our lives. Smart grids,
smart cars, and medical devices all incorporate IoT
devices as key components. The ubiquity and criticality
of these devices make them an attractive target for
attackers. Therefore, we need techniques to analyze
their security so that we can address their potential
vulnerabilities. IoT devices, unlike remote servers,
are user-facing and, therefore, an attacker may
interact with them more extensively, e.g., via physical
access. Existing techniques for analyzing security of
IoT devices either rely on a pre-defined set of attacks
and, therefore, have limited effect or do not consider
the specific capabilities the attackers have against
IoT devices. Security analysis techniques may operate
at the design-level, leveraging abstraction to avoid
state-space explosion, or at the code-level for
ensuring accuracy. In this article, we introduce two
techniques, one at the design-level, and the other at
the code-level, to analyze security of IoT devices, and
compare their effectiveness. The former technique uses
model checking, while the latter uses symbolic
execution, to find attacks based on the attacker's
capabilities. We evaluate our techniques on an open
source smart meter. We find that our code-level
analysis technique is able to find three times more
attacks and complete the analysis in half the time,
compared to the design-level analysis technique, with
no false positives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Belson:2019:SAP,
author = "Bruce Belson and Jason Holdsworth and Wei Xiang and
Bronson Philippa",
title = "A Survey of Asynchronous Programming Using Coroutines
in the {Internet of Things} and Embedded Systems",
journal = j-TECS,
volume = "18",
number = "3",
pages = "21:1--21:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319618",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319618",
abstract = "Many Internet of Things and embedded projects are
event driven, and therefore require asynchronous and
concurrent programming. Current proposals for C++20
suggest that coroutines will have native language
support. It is timely to survey the current use of
coroutines in embedded systems development. This
article investigates existing research which uses or
describes coroutines on resource-constrained platforms.
The existing research is analysed with regard to:
software platform, hardware platform, and capacity; use
cases and intended benefits; and the application
programming interface design used for coroutines. A
systematic mapping study was performed, to select
studies published between 2007 and 2018 which contained
original research into the application of coroutines on
resource-constrained platforms. An initial set of 566
candidate papers, collated from on-line databases, were
reduced to only 35 after filters were applied,
revealing the following taxonomy. The C 8 C++
programming languages were used by 22 studies out of
35. As regards hardware, 16 studies used 8- or 16-bit
processors while 13 used 32-bit processors. The four
most common use cases were concurrency (17 papers),
network communication (15), sensor readings (9), and
data flow (7). The leading intended benefits were code
style and simplicity (12 papers), scheduling (9), and
efficiency (8). A wide variety of techniques have been
used to implement coroutines, including native macros,
additional tool chain steps, new language features, and
non-portable assembly language. We conclude that there
is widespread demand for coroutines on
resource-constrained devices. Our findings suggest that
there is significant demand for a formalised, stable,
well-supported implementation of coroutines in C++,
designed with consideration of the special needs of
resource-constrained devices, and further that such an
implementation would bring benefits specific to such
devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Batina:2019:ISI,
author = "Lejla Batina and Sherman S. M. Chow and Gerhard Hancke
and Zhe Liu",
title = "Introduction to the Special Issue on Cryptographic
Engineering for {Internet of Things}: Security
Foundations, Lightweight Solutions, and Attacks",
journal = j-TECS,
volume = "18",
number = "3",
pages = "22:1--22:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322641",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322641",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2019:LIN,
author = "Lu Zhou and Chunhua Su and Zhi Hu and Sokjoon Lee and
Hwajeong Seo",
title = "Lightweight Implementations of {NIST P-256} and {SM2
ECC} on $8$-bit Resource-Constraint Embedded Device",
journal = j-TECS,
volume = "18",
number = "3",
pages = "23:1--23:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3236010",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3236010",
abstract = "Elliptic Curve Cryptography (ECC) now is one of the
most important approach to instantiate asymmetric
encryption and signature schemes, which has been
extensively exploited to protect the security of
cyber-physical systems. With the advent of the Internet
of Things (IoT), a great deal of constrained devices
may require software implementations of ECC operations.
Under this circumstances, the SM2, a set of public key
cryptographic algorithms based on elliptic curves
published by Chinese Commercial Cryptography
Administration Office, was standardized at ISO in 2017
to enhance the cyber-security. However, few research
works on the implementation of SM2 for constrained
devices have been conducted. In this work, we fill this
gap and propose our efficient, secure, and compact
implementation of scalar multiplication on a 256-bit
elliptic curve recommended by the SM2, as well as a
comparison implementation of scalar multiplication on
the same bit-length elliptic curve recommended by NIST.
We re-design some existent techniques to fit the
low-end IoT platform, namely 8-bit AVR processors, and
our implementations evaluated on the desired platform
show that the SM2 algorithms have competitive
efficiency and security with NIST, which would work
well to secure the IoT world.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Turan:2019:CFF,
author = "Furkan Turan and Ingrid Verbauwhede",
title = "Compact and Flexible {FPGA} Implementation of
{Ed25519} and {X25519}",
journal = j-TECS,
volume = "18",
number = "3",
pages = "24:1--24:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3312742",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3312742",
abstract = "This article describes a field-programmable gate array
(FPGA) cryptographic architecture, which combines the
elliptic curve--based Ed25519 digital signature
algorithm and the X25519 key establishment scheme in a
single module. Cryptographically, these are
high-security elliptic curve cryptography algorithms
with short key sizes and impressive execution times in
software. Our goal is to provide a lightweight FPGA
module that enables them on resource-constrained
devices, specifically for Internet of Things (IoT)
applications. In addition, we aim at extensibility with
customisable countermeasures against timing and
differential power analysis side-channel attacks and
fault-injection attacks. For the former, we offer a
choice between time-optimised versus constant-time
execution, with or without Z -coordinate randomisation
and base-point blinding; and for the latter, we offer
enabling or disabling default-case statements in the
Finite State Machine (FSM) descriptions. To obtain
compactness and at the same time fast execution times,
we make maximum use of the Digital Signal Processing
(DSP) slices on the FPGA. We designed a single
arithmetic unit that is flexible to support operations
with two moduli and non-modulus arithmetic. In
addition, our design benefits in-place memory
management and the local storage of inputs into DSP
slices' pipeline registers and takes advantage of
distributed memory. These eliminate a memory access
bottleneck. The flexibility is offered by a micro-code
supported instruction-set architecture. Our design
targets 7-Series Xilinx FPGAs and is prototyped on a
Zynq System-on-Chip (SoC). The base design combining
Ed25519 and X25519 in a single module, and its
implementation requires only around 11.1K Lookup Tables
(LUTs), 2.6K registers, and 16 DSP slices. Also, it
achieves performance of 1.6ms for a signature
generation and 3.6ms for a signature verification for a
1024-bit message with an 82MHz clock. Moreover, the
design can be optimised only for X25519, which gives
the most compact FPGA implementation compared to
previously published X25519 implementations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2019:XBL,
author = "Weiqiang Liu and Lei Zhang and Zhengran Zhang and
Chongyan Gu and Chenghua Wang and Maire O'neill and
Fabrizio Lombardi",
title = "{XOR}-Based Low-Cost Reconfigurable {PUFs} for {IoT}
Security",
journal = j-TECS,
volume = "18",
number = "3",
pages = "25:1--25:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274666",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274666",
abstract = "With the rapid development of the Internet of Things
(IoT), security has attracted considerable interest.
Conventional security solutions that have been proposed
for the Internet based on classical cryptography cannot
be applied to IoT nodes as they are typically
resource-constrained. A physical unclonable function
(PUF) is a hardware-based security primitive and can be
used to generate a key online or uniquely identify an
integrated circuit (IC) by extracting its internal
random differences using so-called challenge-response
pairs (CRPs). It is regarded as a promising low-cost
solution for IoT security. A logic reconfigurable PUF
(RPUF) is highly efficient in terms of hardware cost.
This article first presents a new classification for
RPUFs, namely circuit-based RPUF (C-RPUF) and
algorithm-based RPUF (A-RPUF); two Exclusive OR
(XOR)-based RPUF circuits (an XOR-based reconfigurable
bistable ring PUF (XRBR PUF) and an XOR-based
reconfigurable ring oscillator PUF (XRRO PUF)) are
proposed. Both the XRBR and XRRO PUFs are implemented
on Xilinx Spartan-6 field-programmable gate arrays
(FPGAs). The implementation results are compared with
previous PUF designs and show good uniqueness and
reliability. Compared to conventional PUF designs, the
most significant advantage of the proposed designs is
that they are highly efficient in terms of hardware
cost. Moreover, the XRRO PUF is the most efficient
design when compared with previous RPUFs. Also, both
the proposed XRRO and XRBR PUFs require only 12.5\% of
the hardware resources of previous bitstable ring PUFs
and reconfigurable RO PUFs, respectively, to generate a
1-bit response. This confirms that the proposed XRBR
and XRRO PUFs are very efficient designs with good
uniqueness and reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2019:ESA,
author = "Robert P. Lee and Konstantinos Markantonakis and Raja
Naeem Akram",
title = "Ensuring Secure Application Execution and
Platform-Specific Execution in Embedded Devices",
journal = j-TECS,
volume = "18",
number = "3",
pages = "26:1--26:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3284361",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3284361",
abstract = "The Internet of Things (IoT) is expanding at a large
rate, with devices found in commercial and domestic
settings from industrial sensors to home appliances.
However, as the IoT market grows, so does the number of
attacks made against it with some reports claiming an
increase of 600\% in 2017. This work seeks to prevent
code replacement, injection, and exploitation attacks
by ensuring correct and platform specific application
execution. This combines two previously studied
problems: secure application execution and binding
hardware and software. We present descriptions of both
problems and requirements for ensuring both
simultaneously. We then propose a scheme extending
previous work that meets these requirements, and
describe our implementation of the soft-core Secure
Execution Processor developed and tested on Xilinx
Spartan-6 FPGA. Finally, we analyse the scheme and our
implementation according to performance and the
requirements listed.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cherif:2019:LSD,
author = "Amina Cherif and Malika Belkadi and Damien Sauveron",
title = "A Lightweight and Secure Data Collection Serverless
Protocol Demonstrated in an Active {RFIDs} Scenario",
journal = j-TECS,
volume = "18",
number = "3",
pages = "27:1--27:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274667",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274667",
abstract = "In the growing Internet of Things context, thousands
of computing devices with various functionalities are
producing data (from environmental sensors or other
sources). However, they are also collecting, storing,
processing and transmitting data to eventually
communicate them securely to third parties (e.g.,
owners of devices or cloud data storage). The deployed
devices are often battery-powered mobile or static
nodes equipped with sensors and/or actuators, and they
communicate using wireless technologies. Examples
include unmanned aerial vehicles, wireless sensor
nodes, smart beacons, and wearable health objects. Such
resource-constrained devices include Active Radio
Frequency IDentification (RFID) nodes, and these are
used to illustrate our proposal. In most scenarios,
these nodes are unattended in an adverse environment,
so data confidentiality must be ensured from the
sensing phase through to delivery to authorized
entities: in other words, data must be securely stored
and transmitted to prevent attack by active adversaries
even if the nodes are captured. However, due to the
scarce resources available to nodes in terms of energy,
storage, and/or computation, the proposed security
solution has to be lightweight. In this article, we
propose a serverless protocol to enable Mobile Data
Collectors (MDCs), such as drones, to securely collect
data from mobile and static Active RFID nodes and then
deliver them later to an authorized third party. The
whole solution ensures data confidentiality at each
step (from the sensing phase, before data collection by
the MDC, once data have been collected by MDC, and
during final delivery), while fulfilling the
lightweight requirements for the resource-limited
entities involved. To assess the suitability of the
protocol against the performance requirements, it was
implemented on the most resource-constrained devices to
get the worst possible results. In addition, to prove
the protocol fulfills the security requirements, it was
analyzed using security games and also formally
verified using the AVISPA and ProVerif tools.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2019:LCP,
author = "Lu Zhou and Chunhua Su and Kuo-Hui Yeh",
title = "A Lightweight Cryptographic Protocol with
Certificateless Signature for the {Internet of
Things}",
journal = j-TECS,
volume = "18",
number = "3",
pages = "28:1--28:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301306",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301306",
abstract = "The universality of smart-devices has brought rapid
development and the significant advancement of
ubiquitous applications for the Internet of Things
(IoT). Designing new types of IoT-compatible
cryptographic protocols has become a more popular way
to secure IoT-based applications. Significant attention
has been dedicated to the challenge of implementing a
lightweight and secure cryptographic protocol for IoT
devices. In this study, we propose a lightweight
cryptographic protocol integrating certificateless
signature and bilinear pairing crypto-primitives. In
the proposed protocol, we elegantly refine the
processes to account for computation-limited IoT
devices during security operations. Rigorous security
analyses are conducted to guarantee the robustness of
the proposed cryptographic protocol. In addition, we
demonstrate a thorough performance evaluation, where an
IoT-based test-bed, i.e., the Raspberry PI, is
simulated as the underlying platform of the
implementation of our proposed cryptographic protocol.
The results show the practicability of the proposed
protocol.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sha:2019:CED,
author = "Le-Tian Sha and Fu Xiao and Hai-Ping Huang and Yu Chen
and Ru-Chuan Wang",
title = "Catching Escapers: a Detection Method for Advanced
Persistent Escapers in Industry {Internet of Things}
Based on Identity-based Broadcast Encryption {(IBBE)}",
journal = j-TECS,
volume = "18",
number = "3",
pages = "29:1--29:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319615",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319615",
abstract = "As the Industry 4.0 or Internet of Things (IoT) era
begins, security plays a key role in the Industry
Internet of Things (IIoT) due to various threats, which
include escape or Distributed Denial of Service (DDoS)
attackers in the virtualization layer and vulnerability
exploiters in the device layer. A successful cross-VM
escape attack in the virtualization layer combined with
cross-layer penetration in the device layer, which we
define as an Advanced Persistent Escaper (APE), poses a
great threat. Therefore, the development of detection
and rejection methods for APEs across multiple layers
in IIoT is an open issue. To the best of our knowledge,
less effective methods are established, especially for
vulnerability exploitation in the virtualization layer
and backdoor leverage in the device layer. On the basis
of this, we propose Escaper Cops (EscaperCOP), a
detection method for cross-VM escapers in the
virtualization layer and cross-layer penetrators in the
device layer. In particular, a new detection method for
guest-to-host escapers is proposed for the
virtualization layer. Finally, a novel encryption
method based on Identity-based Broadcast Encryption
(IBBE) is proposed to protect the critical components
in EscaperCOP, detection library, and control command
library. To verify our method, experimental tests are
performed for a large number of APEs in an IIoT
framework. The test results have demonstrated the
proposed method is effective with an acceptable level
of detection ratio.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2019:OPM,
author = "Rehan Ahmed and Bernhard Buchli and Stefan Draskovic
and Lukas Sigrist and Pratyush Kumar and Lothar
Thiele",
title = "Optimal Power Management with Guaranteed Minimum
Energy Utilization for Solar Energy Harvesting
Systems",
journal = j-TECS,
volume = "18",
number = "4",
pages = "30:1--30:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3317679",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3317679",
abstract = "In this work, we present a formal study on optimizing
the energy consumption of energy harvesting embedded
systems. To deal with the uncertainty inherent in solar
energy harvesting systems, we propose the Stochastic
Power Management (SPM) scheme, which builds statistical
models of harvested energy based on historical data.
The proposed stochastic scheme maximizes the lowest
energy consumption across all time intervals while
giving strict probabilistic guarantees on not
encountering battery depletion. For situations where
historical data is not available, we propose the use of
(i) a Finite Horizon Control (FHC) scheme and (ii) a
non-uniformly scaled energy estimator based on an
astronomical model, which is used by FHC. Under certain
realistic assumptions, the FHC scheme can provide
guarantees on minimum energy usage that can be
supported over all times. We further propose and
evaluate a piece-wise linear approximation of FHC for
efficient implementation in resource-constrained
embedded systems. With extensive experimental
evaluation for eight publicly available datasets and
two datasets collected with our own deployments, we
quantitatively establish that the proposed solutions
are highly effective at providing a guaranteed minimum
service level and significantly outperform existing
solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2019:EAR,
author = "Sandeep K. Shukla",
title = "Editorial: Adversaries and Robustness",
journal = j-TECS,
volume = "18",
number = "4",
pages = "30:1--30:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3345556",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3345556",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2019:CDM,
author = "Daibo Liu and Zhichao Cao and Mingyan Liu and Mengshu
Hou and Hongbo Jinag",
title = "Contention-Detectable Mechanism for Receiver-Initiated
{MAC}",
journal = j-TECS,
volume = "18",
number = "4",
pages = "31:1--31:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3317683",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3317683",
abstract = "The energy efficiency and delivery robustness are two
critical issues for low duty-cycled wireless sensor
networks. The asynchronous receiver-initiated
duty-cycling media access control (MAC) protocols have
shown their effectiveness through various studies. In
receiver-initiated MACs, packet transmission is
triggered by the probe of receiver. However, it suffers
from the performance degradation incurred by packet
collision, especially under bursty traffic. Several
protocols have been proposed to address this problem,
but their performance is restricted by the unnecessary
backoff time and long negotiation process. In this
article, we present CD-MAC, an energy-efficient and
robust contention-detectable mechanism for addressing
the collision-catching problem in receiver-initiated
MACs. By exploring the temporal diversity of the
acknowledgments, a receiver recognizes the potential
senders and subsequently polls individual senders one
by one. On that basis, CD-MAC can successfully avoid
packet collision even though multiple senders have data
packets to transmit to the same receiver. We implement
CD-MAC in TinyOS and evaluate its performance on an
indoor testbed with single-hop and multi-hop network
scenarios. The results show that CD-MAC can
significantly improve throughput by 1.72 times compared
with the state-of-the-art receiver-initiated MAC
protocol under bursty traffic loads. The results also
demonstrate that CD-MAC can effectively mitigate the
influence of hidden terminal problem and adapt to
network dynamics well.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2019:NNA,
author = "Xiaokang Wang and Laurence T. Yang and Hongguo Li and
Man Lin and Jianjun Han and Bernady O. Apduhan",
title = "{NQA}: a Nested Anti-collision Algorithm for {RFID}
Systems",
journal = j-TECS,
volume = "18",
number = "4",
pages = "32:1--32:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3330139",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3330139",
abstract = "Radio frequency identification (RFID) systems, as one
of the key components in the Internet of Things (IoT),
have attracted much attention in the domains of
industry and academia. In practice, the performance of
RFID systems rather relies on the effectiveness and
efficiency of anti-collision algorithms. A large body
of studies have recently focused on the anti-collision
algorithms, such as the Q-algorithm (QA), which has
been successfully utilized in EPCglobal Class-1
Generation-2 protocol. However, the performance of
those anti-collision algorithms needs to be further
improved. Observe that fully exploiting the
pre-processing time can improve the efficiency of the
QA algorithm. With an objective of improving the
performance for anti-collision, we propose a Nested
Q-algorithm (NQA), which makes full use of such
pre-processing time and incorporates the advantages of
both Binary Tree (BT) algorithm and QA algorithm.
Specifically, based on the expected number of collision
tags, the NQA algorithm can adaptively select either BT
or QA to identify collision tags. Extensive simulation
results validate the efficiency and effectiveness of
our proposed NQA (i.e., less running time for
processing the same number of active tags) when
compared to the existing algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Su:2019:TFR,
author = "Fang Su and Yongpan Liu and Xiao Sheng and Hyung Gyu
Lee and Naehyuck Chang and Huazhong Yang",
title = "A Task Failure Rate Aware Dual-Channel Solar Power
System for Nonvolatile Sensor Nodes",
journal = j-TECS,
volume = "18",
number = "4",
pages = "33:1--33:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3320270",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3320270",
abstract = "In line with the rapid development of the Internet of
Things (IoT), the maintenance of on-board batteries for
a trillion sensor nodes has become prohibitive both in
time and costs. Energy harvesting is a promising
solution to this problem. However, conventional
energy-harvesting systems with storage suffer from low
efficiency because of conversion loss and storage
leakage. Direct supply systems without energy buffer
provide higher efficiency, but fail to satisfy quality
of service (QoS) due to mismatches between input power
and workloads. Recently, a novel dual-channel
photovoltaic power system has paved the way to achieve
both high energy efficiency and QoS guarantee. This
article focuses on the design-time and run-time
co-optimization of the dual-channel solar power system.
At the design stage, we develop a task failure rate
estimation framework to balance design costs and
failure rate. At run-time, we propose a task failure
rate aware QoS tuning algorithm to further enhance
energy efficiency. Through the experiments on both a
simulation platform and a prototype board, this study
demonstrates a 27\% task failure rate reduction
compared with conventional architectures with identical
design costs. And the proposed online QoS tuning
algorithm brings up to 30\% improvement in energy
efficiency with nearly zero failure rate penalty.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ponugoti:2019:EFH,
author = "Mounika Ponugoti and Aleksandar Milenkovic",
title = "Enabling On-the-Fly Hardware Tracing of Data Reads in
Multicores",
journal = j-TECS,
volume = "18",
number = "4",
pages = "34:1--34:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322642",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322642",
abstract = "Software debugging is one of the most challenging
aspects of embedded system development due to growing
hardware and software complexity, limited visibility of
system components, and tightening time-to-market. To
find software bugs faster, developers often rely on
on-chip trace modules with large buffers to capture
program execution traces with minimum interference with
program execution. However, the high volumes of trace
data and the high cost of trace modules limit the
visibility into the system operation to short program
segments. This article introduces a new
hardware/software technique for capturing and filtering
read data value traces in multicores that enables a
complete reconstruction of parallel program execution.
The proposed technique exploits tracking of data reads
in data caches and cache coherence protocol states to
minimize the number of trace messages streamed out of
the target platform to the software debugger. The
effectiveness of the proposed technique is determined
by analyzing the required trace port bandwidth and
trace buffer sizes as a function of the data cache size
and the number of processor cores. The results show
that the proposed technique significantly reduces the
required trace port bandwidth, from 12.2 to 73.9 times,
when compared to the Nexus-like read data value
tracing, thus enabling continuous on-the-fly data
tracing at modest hardware cost.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Al-bayati:2019:PSD,
author = "Zaid Al-bayati and Youcheng Sun and Haibo Zeng and
Marco {Di Natale} and Qi Zhu and Brett H. Meyer",
title = "Partitioning and Selection of Data Consistency
Mechanisms for Multicore Real-Time Systems",
journal = j-TECS,
volume = "18",
number = "4",
pages = "35:1--35:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3320271",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3320271",
abstract = "Multicore platforms are becoming increasingly popular
in real-time systems. One of the major challenges in
designing multicore real-time systems is ensuring
consistent and timely access to shared resources.
Lock-based protection mechanisms such as MPCP and MSRP
have been proposed to guarantee mutually exclusive
access in multicore systems at the expense of blocking.
In this article, we consider partitioning and
scheduling in multicore real-time systems with resource
sharing. We first propose a resource-aware task
partitioning algorithm for systems with lock-based
protection. Wait-free methods, which ensure consistent
access to shared memory resources with negligible
blocking at the expense of additional memory space, are
a suitable alternative when the shared resource is a
communication buffer. We propose several approaches to
solve the joint problem of task partitioning and the
selection of a data consistency mechanism (lock-based
or wait-free). The problem is first formulated as an
Integer Linear Programming (ILP). For large systems
where an ILP solution is not scalable, we propose two
heuristic algorithms. Experimental results compare the
effectiveness of the proposed approaches in finding
schedulable systems with low memory cost and show how
the use of wait-free methods can significantly improve
schedulability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Desirena-Lopez:2019:TAR,
author = "G. Desirena-L{\'o}pez and A. Ram{\'\i}rez-Trevi{\~n}o
and J. L. Briz and C. R. V{\'a}zquez and D.
G{\'o}mez-Guti{\'e}rrez",
title = "Thermal-aware Real-time Scheduling Using Timed
Continuous {Petri} Nets",
journal = j-TECS,
volume = "18",
number = "4",
pages = "36:1--36:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322643",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322643",
abstract = "We present a thermal-aware, hard real-time (HRT)
global scheduler for a multiprocessor system designed
upon three novel techniques. First, we present a
modeling methodology based on Timed Continuous Petri
nets (TCPN) that yields a complete state variable
model, including job arrivals, CPU usage, power, and
thermal behavior. The model is accurate and avoids the
calibration stage of RC thermal models. Second, based
on this model, a linear programming problem (LPP)
determines the existence of a feasible HRT
thermal-aware schedule. Last, a sliding-mode controller
and an online discretization algorithm implement the
global HRT scheduler, which is capable of managing
thermal constraints, context switching, migrations, and
disturbances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ruaro:2019:SAQ,
author = "Marcelo Ruaro and Axel Jantsch and Fernando Gehm
Moraes",
title = "Self-Adaptive {QoS} Management of Computation and
Communication Resources in Many-Core {SoCs}",
journal = j-TECS,
volume = "18",
number = "4",
pages = "37:1--37:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3328755",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3328755",
abstract = "Providing quality of service (QoS) for many-core
systems with dynamic application admission is
challenging due to the high amount of resources to
manage and the unpredictability of computation and
communication events. Related works propose a
self-adaptive QoS mechanism concerned either in
communication or computation resources, lacking,
however, a comprehensive QoS management of both.
Assuming a many-core system with QoS monitoring,
runtime circuit-switching establishment, task
migration, and a soft real-time task scheduler, this
work fills this gap by proposing a novel self-adaptive
QoS management. The contribution of this proposal comes
with the following features in the QoS management: (i)
comprehensiveness, by covering communication and
computation resources; (ii) online, adopting the ODA
(Observe, Decide, Act) runtime closed-loop adaptation;
and (iii) reactive and proactive decisions, by using a
dynamic application profile extraction technique, which
enables the QoS management to be aware of the profile
of running applications, allowing it to take proactive
decisions based on a prediction analysis. The proposed
QoS management adopts a decentralized organization by
partitioning the system in clusters, each one managed
by a dedicated processor, making the proposal scalable.
Results show that the proactive feature accurately
extracts the applications' profile, and can prevent
future QoS violations. The synergy of reactive and
proactive decisions was able to sustain QoS, reducing
the deadline miss rate by 99.5\% with a severe
disturbance in communication and computation levels,
and avoiding deadline misses up to 70\% of system
utilization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ali:2019:CCT,
author = "G. G. Md. Nawaz Ali and Md. Noor-A-Rahim and Md.
Ashiqur Rahman and Beshah Ayalew and Peter H. J. Chong
and Yong Liang Guan",
title = "Cooperative Cache Transfer-based On-demand Network
Coded Broadcast in Vehicular Networks",
journal = j-TECS,
volume = "18",
number = "4",
pages = "38:1--38:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3329865",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:43 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3329865",
abstract = "Real-time traffic updates, safety and comfort driving,
infotainment, and so on, are some envisioned
applications in vehicular networks. Unlike traditional
broadcast, network-coding-assisted broadcast can
satisfy multiple vehicles with different data items in
a coded form. However, server side encoding requires
the prior knowledge about vehicles' cache information
for the successful decoding at the vehicles' sides. The
explicit cache upload from vehicles to Road Side Unit
(RSU) wastes upload bandwidth. In multi-RSU vehicular
networks, we propose a Cooperative Cache Transfer-based
On-demand Network Coded Broadcast called CCTCB. In the
proposed CCTCB approach, vehicles do not need to upload
their cache information to the server, rather the RSU
server learns the vehicles' cache intrinsically. We
derive a probabilistic model to analyze the coding
opportunity in the proposed cooperative cache transfer
mechanism incorporating vehicle mobility. The
comprehensive simulation results validate the
superiority of the proposed approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2019:OIW,
author = "Yu-Chieh Chen and Ching-Chih Chang and Ramesh Perumal
and Shih-Rung Yeh and Yen-Chung Chang and Hsin Chen",
title = "Optimization and Implementation of Wavelet-based
Algorithms for Detecting High-voltage Spindles in
Neuron Signals",
journal = j-TECS,
volume = "18",
number = "5",
pages = "39:1--39:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3329864",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3329864",
abstract = "This article presents a microcontroller unit (MCU)
based simplified discrete wavelet transform (Sim-DWT)
algorithm that can detect high-voltage spindles (HVSs)
in local field potential (LFP) signals. The Sim-DWT
algorithm operates in an 8-bit MCU, 8MHz operating
clock and 16 sample points of buffers to detect HVSs
with a frequency range of 5-15Hz. The requirement of
only sixteen 8-bit sample points as the window length
for calculation and no need for a multiplier render the
Sim-DWT easy to implement in an MCU with limited
hardware resources. The Sim-DWT is applied in an 8-bit
MCU with 6mW power consumption (including IO ports) and
was tested for detecting LFP signals in vivo. The
design methods and the accuracy of three typical types
of mother wavelet functions (Haar, DB4, Morlet) in the
Sim-DWT were also tested and compared with those of a
PC-based system. The experimental results showed that
with appropriately designed cMW functions in the
Sim-DWT, HVSs could be detected more accurately than
they could be in PC-based software. The present study
indicates that the optimized HVS detector (Sim-DWT) can
be implemented in an 8-bit MCU with limited hardware
resources and is suitable to serve as the digital core
in a closed-loop deep brain stimulator microsystem in
the future.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Noltsis:2019:CLC,
author = "Michail Noltsis and Nikolaos Zambelis and Francky
Catthoor and Dimitrios Soudris",
title = "A Closed-Loop Controller to Ensure Performance and
Temperature Constraints for Dynamic Applications",
journal = j-TECS,
volume = "18",
number = "5",
pages = "40:1--40:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3343030",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3343030",
abstract = "To secure correct system operation, a plethora of
Reliability, Availability and Serviceability (RAS)
techniques have been deployed by circuit designers. RAS
mechanisms however, come with the cost of extra clock
cycles. In addition, a wide variety of dynamic
workloads and different input conditions often
constitute preemptive dependability techniques hard to
implement. To this end, we focus on a realistic case
study of a closed-loop controller that mitigates
performance variation with a reactive response. This
concept has been discussed but was only illustrated on
small benchmarks. In particular, the extension of the
approach to manage performance of dynamic workloads on
a target platform has not been shown earlier. We
compare our scheme against the version of a Linux CPU
frequency governor in terms of timing response and
energy consumption. Finally, we move forward and
suggest a new flavor of our controller to efficiently
manage processor temperature. Again, the concept is
illustrated with a realistic case study and compared to
a modern temperature manager.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Durrieu:2019:GAC,
author = "Guy Durrieu and Claire Pagetti",
title = "{GRec}: Automatic Computation of Reconfiguration
Graphs for Multi-core Platforms",
journal = j-TECS,
volume = "18",
number = "5",
pages = "41:1--41:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3350533",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3350533",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2019:SEA,
author = "Siwen Zhu and Yi Tang and Junxiang Zheng and Yongzhi
Cao and Hanpin Wang and Yu Huang and Marian Margraf",
title = "Sample Essentiality and Its Application to Modeling
Attacks on Arbiter {PUFs}",
journal = j-TECS,
volume = "18",
number = "5",
pages = "42:1--42:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3344148",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3344148",
abstract = "Physically Unclonable Functions (PUFs), as an
alternative hardware-based security method, have been
challenged by some modeling attacks. As is known to
all, samples are significant in modeling attacks on
PUFs, and thus, some efforts have been made to expand
sample sets therein to improve modeling attacks. A
closer examination, however, reveals that not all
samples contribute to modeling attacks equally.
Therefore, in this article, we introduce the concept of
sample essentiality for describing the contribution of
a sample in modeling attacks and point out that any
sample without sample essentiality cannot enhance some
modeling attacks on PUFs. As a by-product, we find
theoretically and empirically that the samples expanded
by the procedures proposed by Chatterjee et al. do not
satisfy our sample essentiality. Furthermore, we
propose the notion of essential sample sets for
datasets and discuss its basic properties. Finally, we
demonstrate that our results about sample essentiality
can be used to reduce samples efficiently and benefit
sample selection in modeling attacks on arbiter PUFs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Strobel:2019:PMA,
author = "Manuel Strobel and Martin Radetzki",
title = "Power-mode-aware Memory Subsystem Optimization for
Low-power System-on-Chip Design",
journal = j-TECS,
volume = "18",
number = "5",
pages = "43:1--43:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3356583",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3356583",
abstract = "The memory subsystem is increasingly subject to an
intensive energy minimization effort in embedded and
System-on-Chip development. While the main focus is
typically put on energy consumption reduction, there
are other optimization aspects that become more and
more relevant as well, e.g., peak power constraints or
time budgets. In this regard, the present article makes
the following contributions. Taking industrial-grade
information into account, different Static
Random-Access Memory (SRAM) power modes and their
characteristics are presented at first. Using this
information, a comprehensive optimization model with
the main intention of energy minimization is defined.
It is based on memory access statistics that represent
the embedded software of interest, which allows for
application-tailored improvements. Further, it
considers different power states of the memory
subsystem and enables the definition of peak power and
time corridor constraints. The presented two-stage
implementation of this optimization model allows the
handling of large design spaces. Clearly defined
interfaces facilitate the exchange of individual
workflow parts in a plug-and-play fashion and further
enable a neat integration of our optimization method
with existing hardware/software (HW/SW) codesign
synthesis flows. A general evaluation for different
technology nodes yields that the optimization potential
of memory low-power modes increases with advancing
miniaturization but also depends on the data footprint
of the embedded software. Experimental results for a
set of benchmark applications confirm these findings
and provide energy savings of up to 90\% and over 60\%
on average compared to a monolithic memory layout
without low-power modes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Motamedi:2019:DNA,
author = "Mohammad Motamedi and Felix A. Portillo and Daniel
Fong and Soheil Ghiasi",
title = "{Distill-Net}: Application-Specific Distillation of
Deep Convolutional Neural Networks for
Resource-Constrained {IoT} Platforms",
journal = j-TECS,
volume = "18",
number = "5",
pages = "44:1--44:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3360512",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3360512",
abstract = "Many Internet-of-Things (IoT) applications demand fast
and accurate understanding of a few key events in their
surrounding environment. Deep Convolutional Neural
Networks (CNNs) have emerged as an effective approach
to understand speech, images, and similar
high-dimensional data types. Algorithmic performance of
modern CNNs, however, fundamentally relies on learning
class-agnostic hierarchical features that only exist in
comprehensive training datasets with many classes. As a
result, fast inference using CNNs trained on such
datasets is prohibitive for most resource-constrained
IoT platforms. To bridge this gap, we present a
principled and practical methodology for distilling a
complex modern CNN that is trained to effectively
recognize many different classes of input data into an
application-dependent essential core that not only
recognizes the few classes of interest to the
application accurately but also runs efficiently on
platforms with limited resources. Experimental results
confirm that our approach strikes a favorable balance
between classification accuracy (application
constraint), inference efficiency (platform
constraint), and productive development of new
applications (business constraint).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2019:RTA,
author = "Quan Zhou and Guohui Li and Jianjun Li and Chenggang
Deng and Ling Yuan",
title = "Response Time Analysis for Tasks with Fixed Preemption
Points under Global Scheduling",
journal = j-TECS,
volume = "18",
number = "5",
pages = "111:1--111:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3360513",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3360513",
abstract = "As an effective method for detecting the
schedulability of real-time tasks on multiprocessor
platforms, Response time analysis (RTA) has been deeply
researched in recent decades. Most of the existing RTA
methods are designed for tasks that can be preempted at
any time. However, in some real-time systems, a task
may have some fixed preemption points (FPPs) that
divide its execution into a series of non-preemptive
regions (NPRs). In such environments, the task can only
be preempted at its FPPs, which makes existing RTA
methods for arbitrary preemption tasks not applicable.
In this article, we study the schedulability analysis
on tasks with FPPs under both global fixed-priority
(G-FP) scheduling and global earliest deadline first
(G-EDF) scheduling. First, based on the idea of
limiting the time interval between two consecutive
executions of an NPR, a novel RTA method for tasks with
FPPs under G-FP scheduling is proposed. Second, we
propose an effective RTA method for tasks with FPPs
under G-EDF scheduling. Finally, extensive simulations
are conducted and the results validate the
effectiveness of the proposed methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "111",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yu:2019:TND,
author = "Jiecao Yu and Andrew Lukefahr and Reetuparna Das and
Scott Mahlke",
title = "{TF-Net}: Deploying Sub-Byte Deep Neural Networks on
Microcontrollers",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "45:1--45:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358189",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358189",
abstract = "Deep Neural Networks (DNNs) have become an essential
component of various applications. While today's DNNs
are mainly restricted to cloud services, network
connectivity, energy, and data privacy problems make it
important to support efficient DNN computation on
low-cost, low-power processors like microcontrollers.
However, due to the constrained computation resources,
it is challenging to execute large DNN models on
microcontrollers. Using sub-byte low-precision input
activations and weights is a typical method to reduce
DNN computation. But on byte-addressable
microcontrollers, the sub-byte computation is not well
supported. The sub-byte inputs and weights need to be
unpacked from bitstreams before computation, which
incurs significant computation and energy overhead. In
this paper, we propose the TF-Net pipeline to
efficiently deploy sub-byte DNNs on microcontrollers.
While TF-Net allows for a range of weight and input
precision, we find Ternary weights and Four-bit inputs
provide the optimal balance between model accuracy,
computation performance, and energy efficiency. TF-Net
first includes a training framework for sub-byte
low-precision DNN models. Two algorithms are then
introduced to accelerate the trained models. The first,
direct buffer convolution, amortizes unpacking overhead
by caching unpacked inputs. The second, packed sub-byte
multiply-accumulate, utilizes a single multiplication
instruction to perform multiple sub-byte
multiply-accumulate computations. To further accelerate
DNN computation, we propose two instructions,
Multiply-Shift-Accumulate and Unpack, to extend the
existing microcontroller instruction set. On the tested
networks, TF-Net can help improve the computation
performance and energy efficiency by $ 1.83 \times $
and $ 2.28 \times $ on average, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Goncalves:2019:AER,
author = "Larissa Rozales Gon{\c{c}}alves and Rafael F{\~a}o {De
Moura} and Luigi Carro",
title = "Aggressive Energy Reduction for Video Inference with
Software-only Strategies",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "46:1--46:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358174",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358174",
abstract = "In the past years, several works have proposed custom
hardware and software-based techniques for the
acceleration of Convolutional Neural Networks (CNNs).
Most of these works focus on saving computations by
changing the used precision or modifying frame
processing. To reach a more aggressive energy
reduction, in this paper we propose software-only
modifications to the CNNs inference process. Our
approach exploits the inherent locality in videos by
replacing entire frame computations with a movement
prediction algorithm. Furthermore, when a frame must be
processed, we avoid energy-demanding floating-point
operations, and at the same time reduce memory accesses
by employing look-up tables in place of the original
convolutions. Using the proposed approach, one can
reach significant energy gains of more than $ 25 \times
$ for security cameras, and $ 12 \times $ for moving
vehicles applications, with only small software
modifications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2019:CCL,
author = "Jeff (Jun) Zhang and Parul Raj and Shuayb Zarar and
Amol Ambardekar and Siddharth Garg",
title = "{CompAct}: On-chip Compression of Activations for Low
Power Systolic Array Based {CNN} Acceleration",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "47:1--47:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358178",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358178",
abstract = "This paper addresses the design of systolic array (SA)
based convolutional neural network (CNN) accelerators
for mobile and embedded domains. On- and off-chip
memory accesses to the large activation inputs
(sometimes called feature maps) of CNN layers
contribute significantly to total energy consumption
for such accelerators; while prior has proposed
off-chip compression, activations are still stored
on-chip in uncompressed form, requiring either large
on-chip activation buffers or slow and energy-hungry
off-chip accesses. In this paper, we propose CompAct, a
new architecture that enables on-chip compression of
activations for SA based CNN accelerators. CompAct is
built around several key ideas. First, CompAct
identifies an SA schedule that has nearly regular
access patterns, enabling the use of a modified
run-length coding scheme (RLC). Second, CompAct
improves compression ratio of the RLC scheme using
Sparse-RLC in later CNN layers and Lossy-RLC in earlier
layers. Finally, CompAct proposes look-ahead snoozing
that operates synergistically with RLC to reduce the
leakage energy of activation buffers. Based on detailed
synthesis results, we show that CompAct enables up to
62\% reduction in activation buffer energy, and 34\%
reduction in total chip energy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Castro-Godinez:2019:EBE,
author = "Jorge Castro-God{\'\i}nez and Muhammad Shafique and
J{\"o}rg Henkel",
title = "{ECAx}: Balancing Error Correction Costs in
Approximate Accelerators",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "48:1--48:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358179",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358179",
abstract = "Approximate computing has emerged as a design paradigm
amenable to error-tolerant applications. It enables
trading the quality of results for efficiency
improvement in terms of delay, power, and energy
consumption under user-provided tolerable quality
degradation. Approximate accelerators have been
proposed to expedite frequently executing code sections
of error-resilient applications while meeting a defined
quality level. However, these accelerators may produce
unacceptable errors at run time if the input data
changes or dynamic adjustments are made for a defined
output quality constraint. State-of-the-art approaches
in approximate computing address this issue by
correctly re-computing those accelerator invocations
that produce unacceptable errors; this is achieved by
using the host processor or an alternate exact
accelerator, which is activated on-demand.
Nevertheless, such approaches can nullify the benefits
of approximate computing, especially when input data
variations are high at run time and errors due to
approximations are above a tolerable threshold. As a
robust and general solution to this problem, we propose
ECAx, a novel methodology to explore low-overhead error
correction in approximate accelerators by selectively
correcting most significant errors, in terms of their
magnitude, without losing the gains of approximations.
We particularly consider the case of approximate
accelerators built with approximate functional units
such as approximate adders. Our novel methodology
reduces the required exact re-computations on the host
processor, achieving up to 20\% performance gain
compared to state-of-the-art approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhat:2019:ULE,
author = "Ganapati Bhat and Yigit Tuncel and Sizhe An and Hyung
Gyu Lee and Umit Y. Ogras",
title = "An Ultra-Low Energy Human Activity Recognition
Accelerator for Wearable Health Applications",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "49:1--49:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358175",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358175",
abstract = "Human activity recognition (HAR) has recently received
significant attention due to its wide range of
applications in health and activity monitoring. The
nature of these applications requires mobile or
wearable devices with limited battery capacity. User
surveys show that charging requirement is one of the
leading reasons for abandoning these devices. Hence,
practical solutions must offer ultra-low power
capabilities that enable operation on harvested energy.
To address this need, we present the first fully
integrated custom hardware accelerator (HAR engine)
that consumes 22.4 $ \mu $J per operation using a
commercial 65 nm technology. We present a complete
solution that integrates all steps of HAR, i.e.,
reading the raw sensor data, generating features, and
activity classification using a deep neural network
(DNN). It achieves 95\% accuracy in recognizing 8
common human activities while providing three orders of
magnitude higher energy efficiency compared to existing
solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wijerathne:2019:CHT,
author = "Dhananjaya Wijerathne and Zhaoying Li and Manupa
Karunarathne and Anuj Pathania and Tulika Mitra",
title = "{CASCADE}: High Throughput Data Streaming via
Decoupled Access-Execute {CGRA}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "50:1--50:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358177",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358177",
abstract = "A Coarse-Grained Reconfigurable Array (CGRA) is a
promising high-performance low-power accelerator for
compute-intensive loop kernels. While the mapping of
the computations on the CGRA is a well-studied problem,
bringing the data into the array at a high throughput
remains a challenge. A conventional CGRA design
involves on-array computations to generate memory
addresses for data access undermining the attainable
throughput. A decoupled access-execute architecture, on
the other hand, isolates the memory access from the
actual computations resulting in a significantly higher
throughput. We propose a novel decoupled access-execute
CGRA design called CASCADE with full architecture and
compiler support for high-throughput data streaming
from an on-chip multi-bank memory. CASCADE offloads the
address computations for the multi-bank data memory
access to a custom designed programmable hardware. An
end-to-end fully-automated compiler synchronizes the
conflict-free movement of data between the memory banks
and the CGRA. Experimental evaluations show on average
$ 3 \times $ performance benefit and $ 2.2 \times $
performance per watt improvement for CASCADE compared
to an iso-area conventional CGRA with a bigger
processing array in lieu of a dedicated hardware memory
address generation logic.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Restuccia:2019:YBA,
author = "Francesco Restuccia and Marco Pagani and Alessandro
Biondi and Mauro Marinoni and Giorgio Buttazzo",
title = "Is Your Bus Arbiter Really Fair? {Restoring} Fairness
in {AXI} Interconnects for {FPGA SoCs}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "51:1--51:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358183",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358183",
abstract = "AMBA AXI is a popular bus protocol that is widely
adopted as the medium to exchange data in
field-programmable gate array system-on-chips (FPGA
SoCs). The AXI protocol does not specify how
conflicting transactions are arbitrated and hence the
design of bus arbiters is left to the vendors that
adopt AXI. Typically, a round-robin arbitration is
implemented to ensure a fair access to the bus by the
master nodes, as for the popular SoCs by Xilinx. This
paper addresses a critical issue that can arise when
adopting the AXI protocol under round-robin
arbitration; specifically, in the presence of bus
transactions with heterogeneous burst sizes. First, it
is shown that a completely unfair bandwidth
distribution can be achieved under some configurations,
making possible to arbitrarily decrease the bus
bandwidth of a target master node. This issue poses
serious performance, safety, and security concerns.
Second, a low-latency (one clock cycle) module named
AXI burst equalizer (ABE) is proposed to restore
fairness. Our investigations and proposals are
supported by implementations and tests upon three
modern SoCs. Experimental results are reported to
confirm the existence of the issue and assess the
effectiveness of the ABE with bus traffic generators
and hardware accelerators from the Xilinx's IP
library.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mandal:2019:APM,
author = "Sumit K. Mandal and Raid Ayoub and Michael Kishinevsky
and Umit Y. Ogras",
title = "Analytical Performance Models for {NoCs} with Multiple
Priority Traffic Classes",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "52:1--52:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358176",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358176",
abstract = "Networks-on-chip (NoCs) have become the standard for
interconnect solutions in industrial designs ranging
from client CPUs to many-core chip-multiprocessors.
Since NoCs play a vital role in system performance and
power consumption, pre-silicon evaluation environments
include cycle-accurate NoC simulators. Long simulations
increase the execution time of evaluation frameworks,
which are already notoriously slow, and prohibit
design-space exploration. Existing analytical NoC
models, which assume fair arbitration, cannot replace
these simulations since industrial NoCs typically
employ priority schedulers and multiple priority
classes. To address this limitation, we propose a
systematic approach to construct priority-aware
analytical performance models using micro-architecture
specifications and input traffic. Our approach
decomposes the given NoC into individual queues with
modified service time to enable accurate and scalable
latency computations. Specifically, we introduce novel
transformations along with an algorithm that
iteratively applies these transformations to decompose
the queuing system. Experimental evaluations using real
architectures and applications show high accuracy of
97\% and up to $ 2.5 \times $ speedup in full-system
simulation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Song:2019:EEP,
author = "Shihao Song and Anup Das and Onur Mutlu and Nagarajan
Kandasamy",
title = "Enabling and Exploiting Partition-Level Parallelism
{(PALP)} in Phase Change Memories",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "53:1--53:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358180",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358180",
abstract = "Phase-change memory (PCM) devices have multiple banks
to serve memory requests in parallel. Unfortunately, if
two requests go to the same bank, they have to be
served one after another, leading to lower system
performance. We observe that a modern PCM bank is
implemented as a collection of partitions that operate
mostly independently while sharing a few global
peripheral structures, which include the sense
amplifiers (to read) and the write drivers (to write).
Based on this observation, we propose PALP, a new
mechanism that enables partition-level parallelism
within each PCM bank, and exploits such parallelism by
using the memory controller's access scheduling
decisions. PALP consists of three new contributions.
First, we introduce new PCM commands to enable
parallelism in a bank's partitions in order to resolve
the read-write bank conflicts, with no changes needed
to PCM logic or its interface. Second, we propose
simple circuit modifications that introduce a new
operating mode for the write drivers, in addition to
their default mode of serving write requests. When
configured in this new mode, the write drivers can
resolve the read-read bank conflicts, working jointly
with the sense amplifiers. Finally, we propose a new
access scheduling mechanism in PCM that improves
performance by prioritizing those requests that exploit
partition-level parallelism over other requests,
including the long outstanding ones. While doing so,
the memory controller also guarantees
starvation-freedom and the PCM's
running-average-power-limit (RAPL). We evaluate PALP
with workloads from the MiBench and SPEC CPU2017
Benchmark suites. Our results show that PALP reduces
average PCM access latency by 23\%, and improves
average system performance by 28\% compared to the
state-of-the-art approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sridhar:2019:SEC,
author = "Aditya Sridhar and Mohamed Ibrahim and Krishnendu
Chakrabarty",
title = "Synterface: Efficient Chip-to-World Interfacing for
Flow-Based Microfluidic Biochips Using Pin-Count
Minimization",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "54:1--54:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358188",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358188",
abstract = "Flow-based microfluidic biochips can be used to
perform bioassays by manipulating a large number of
on-chip valves. These biochips are increasingly used
today for biomolecular recognition, single-cell
screening, and point-of-care disease diagnostics, and
design-automation solutions for flow-based
microfluidics enable the mapping and optimization of
bimolecular protocols and software-based valve control.
However, a key problem that has not received adequate
attention is chip-to-world interfacing, which requires
the use of off-chip control equipment to provide
control signals for the on-chip valves. This problem is
exacerbated by the increase in the number of valves as
chips get more complex. To address the interfacing
problem, we present an efficient pin-count minimization
(synthesis) problem, referred to as Synterface, which
uses on-chip microfluidic logic gates and optimization
based on concepts from linear algebra. We present
results to show that Synterface significantly reduces
pin-count and simplifies the external interface for
flow-based microfluidics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2019:OBI,
author = "Minsu Kim and Jeong-Keun Park and Sungyeol Kim and
Insu Yang and Hyunsoo Jung and Soo-Mook Moon",
title = "Output-based Intermediate Representation for
Translation of Test-pattern Program",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "55:1--55:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358186",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358186",
abstract = "An Intermediate Representation (IR) used by compilers
is normally generated statically, as a result of
parsing or analyzing the source program. This paper
proposes a completely different type of IR, generated
as a result of running the source program, the
output-based IR. There is a practical translation
problem where such an IR is useful, in the domain of
test-pattern programs. Test-pattern programs run on ATE
(automatic test equipment), a special embedded system
to test semiconductors such as DRAMs. They generate a
pattern for each clock, a bit vector input to the pins
of the chip. One issue is that different ATEs require
different programming since each ATE manufacturer has
its own programming language. Nonetheless, we should be
able to test a memory chip on different ATEs as long as
they generate the same patterns with the same speed.
Therefore, a memory chipmaker wants to make a pattern
program portable across ATEs, to fully utilize their
ATE resources. One solution is translating between
pattern programs, for which we need an IR since there
are multiple source ATEs and target ATEs. Instead of a
conventional, static IR, we propose using the output
pattern itself as an IR. Since the pattern is
independent of ATEs and easily obtainable, the
output-based IR obviates designing a static IR
considering all ATE programming languages and hardware
differences. Moreover, we might synthesize a better
target program from the IR, more optimized to the
target ATE. However, the full pattern generated by a
product-level pattern program is huge, so we propose
using an IR of abbreviated patterns, annotated with the
repetition information obtained while executing the
source program. Our experimental results with
product-level pattern programs show that our approach
is feasible.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Silva:2019:RFG,
author = "Lucas Bragan{\c{c}}a {Da Silva} and Ricardo Ferreira
and Michael Canesche and Marcelo M. Menezes and Maria
D. Vieira and Jeronimo Penha and Peter Jamieson and
Jos{\'e} Augusto M. Nacif",
title = "{READY}: a Fine-Grained Multithreading Overlay
Framework for Modern {CPU--FPGA} Dataflow
Applications",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "56:1--56:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358187",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358187",
abstract = "In this work, we propose a framework called
REconfigurable Accelerator DeploY (READY), the first
framework to support polynomial runtime mapping of
dataflow applications in high-performance CPU-FPGA
platforms. READY introduces an efficient mapping with
fine-grained multithreading onto an overlay
architecture that hides the latency of a global
interconnection network. In addition to our overlay
architecture, we show how this system helps solve some
of the challenges for FPGA cloud computing adoption in
high-performance computing. The framework encapsulates
dataflow descriptions by using a target independent,
high-level API, and a dataflow model that allows for
explicit spatial and temporal parallelism. READY
directly maps the dataflow kernels onto the
accelerator. Our tool is flexible and extensible and
provides the infrastructure to explore different
accelerator designs. We validate READY on the Intel
Harp platform, and our experimental results show an
average 2x execution runtime improvement when compared
to an 8-thread multi-core processor.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2019:MOE,
author = "Sunghyun Park and Youfeng Wu and Janghaeng Lee and
Amir Aupov and Scott Mahlke",
title = "Multi-objective Exploration for Practical Optimization
Decisions in Binary Translation",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "57:1--57:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358185",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358185",
abstract = "In the design of mobile systems, hardware/software
(HW/SW) co-design has important advantages by creating
specialized hardware for the performance or power
optimizations. Dynamic binary translation (DBT) is a
key component in co-design. During the translation, a
dynamic optimizer in the DBT system applies various
software optimizations to improve the quality of the
translated code. With dynamic optimization,
optimization time is an exposed run-time overhead and
useful analyses are often restricted due to their high
costs. Thus, a dynamic optimizer needs to make smart
decisions with limited analysis information, which
complicates the design of optimization decision models
and often causes failures in human-made heuristics. In
mobile systems, this problem is even more challenging
because of strict constraints on computing capabilities
and memory size. To overcome the challenge, we
investigate an opportunity to build practical
optimization decision models for DBT by using machine
learning techniques. As the first step, loop unrolling
is chosen as the representative optimization. We base
our approach on the industrial strength DBT
infrastructure and conduct evaluation with 17,116
unrollable loops collected from 200 benchmarks and
real-life programs across various domains. By utilizing
all available features that are potentially important
for loop unrolling decision, we identify the best
classification algorithm for our infrastructure with
consideration for both prediction accuracy and cost.
The greedy feature selection algorithm is then applied
to the classification algorithm to distinguish its
significant features and cut down the feature space. By
maintaining significant features only, the best
affordable classifier, which satisfies the budgets
allocated to the decision process, shows 74.5\% of
prediction accuracy for the optimal unroll factor and
realizes an average 20.9\% reduction in dynamic
instruction count during the steady-state translated
code execution. For comparison, the best baseline
heuristic achieves 46.0\% prediction accuracy with an
average 13.6\% instruction count reduction. Given that
the infrastructure is already highly optimized and the
ideal upper bound for instruction reduction is observed
at 23.8\%, we believe this result is noteworthy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Durr:2019:EET,
author = "Marco D{\"u}rr and Georg {Von Der Br{\"u}ggen} and
Kuan-Hsun Chen and Jian-Jia Chen",
title = "End-to-End Timing Analysis of Sporadic Cause-Effect
Chains in Distributed Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "58:1--58:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358181",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358181",
abstract = "A cause-effect chain is used to define the logical
order of data dependent tasks, which is independent
from the execution order of the jobs of the
(periodic/sporadic) tasks. Analyzing the worst-case
End-to-End timing behavior, associated to a
cause-effect chain, is an important problem in embedded
control systems. For example, the detailed timing
properties of modern automotive systems are specified
in the AUTOSAR Timing Extensions. In this paper, we
present a formal End-to-End timing analysis for
distributed systems. We consider the two most important
End-to-End timing semantics, i.e., the button-to-action
delay (termed as the maximum reaction time) and the
worst-case data freshness (termed as the maximum data
age). Our contribution is significant due to the
consideration of the sporadic behavior of job
activations, whilst the results in the literature have
been mostly limited to periodic activations. The proof
strategy shows the (previously unexplored) connection
between the reaction time (data age, respectively) and
immediate forward (backward, respectively) job chains.
Our analytical results dominate the state of the art
for sporadic task activations in distributed systems
and the evaluations show a clear improvement for
synthesized task systems as well as for a real world
automotive benchmark setting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leipnitz:2019:HLS,
author = "Marcos T. Leipnitz and Gabriel L. Nazar",
title = "High-Level Synthesis of Approximate Designs under
Real-Time Constraints",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "59:1--59:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358182",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358182",
abstract = "The adoption of High-Level Synthesis (HLS) has
increased as the latest HLS tools have evolved to
provide high-quality results while improving
productivity and time-to-market. Concurrently, many
works have been proposing the incorporation of
approximate computing techniques within HLS toolchains,
allowing automated generation of inexact circuits for
error-tolerant application domains with the aim of
trading-off computation accuracy with area/power
savings or performance improvements. Thus, when
attempting to make a design meet timing requirements,
designers of real-time systems using HLS may resort to
approximation approaches. However, current approximate
HLS tools do not allow specifying real-time
constraints, being instead error-constrained to explore
area, power, or performance optimizations. In this
work, we propose an approximate HLS framework for
real-time systems that can be integrated with
state-of-the-art HLS tools. With this framework
designers can specify real-time constraints and satisfy
them while minimizing the output error. It uses
scheduling information and Worst-Case Execution Time
(WCET) analysis for iteratively exploring time-error
trade-offs of approximations in the time-critical
execution path. Experimental results on signal and
image processing benchmarks show that we can reduce the
WCET of exact designs by up to 35\% with acceptable
quality degradation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saeed:2019:LDB,
author = "Samah Mohamed Saeed and Robert Wille and Ramesh
Karri",
title = "Locking the Design of Building Blocks for Quantum
Circuits",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "60:1--60:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358184",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358184",
abstract = "The research community expects that quantum computers
will give economical results for particular problems on
which the classical computers break down. Examples
include factoring of large numbers, searching in a big
database, or simulating chemical reactions to design
new drugs. Attempts are ongoing to build up a practical
quantum computer. Users (clients) can implement quantum
circuits to run on these quantum computers. However,
before running the quantum circuit on the quantum
computer, the users (clients) should compile, optimize,
decompose, and technology map the quantum circuit. In
the current embodiment, the resulting quantum circuit
runs on a remote and untrusted quantum computer server
--- introducing security risks. This study explores the
risk of outsourcing the quantum circuit to the quantum
computer by focusing on quantum oracles. Quantum
oracles are pivotal building blocks and require
specialized expertise and means to design. Hence, the
designer may protect this proprietary quantum oracle
intellectual property (IP) and hide his/her private
information. We investigate how to manage that on a
quantum computer server using the IBM project QX
quantum computer and Qiskit tools as an exemplar.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mohanty:2019:SPE,
author = "Ram Prasad Mohanty and Hasindu Gamaarachchi and Andrew
Lambert and Sri Parameswaran",
title = "{SWARAM}: Portable Energy and Cost Efficient Embedded
System for Genomic Processing",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "61:1--61:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358211",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358211",
abstract = "Treatment of patients using high-quality precision
medicine requires a thorough understanding of the
genetic composition of a patient. Ideally, the
identification of unique variations in an individual's
genome is needed for specifying the necessary
treatment. Variant calling workflow is a pipeline of
tools, integrating state of the art software systems
aimed at alignment, sorting and variant calling for the
whole genome sequencing (WGS) data. This pipeline is
utilized for identifying unique variations in an
individual's genome (compared to a reference genome).
Currently, such a workflow is implemented on
high-performance computers (with additional GPUs or
FPGAs) or in cloud computers. Such systems are large,
have a high cost, and rely on the internet for genome
data transfer which makes the system unusable in remote
locations unequipped with internet connectivity. It
further raises privacy concerns due to processing being
carried out in a different facility. To overcome such
limitations, in this paper, for the first time, we
present a cost-efficient, offline, scalable, portable,
and energy-efficient computing system named SWARAM for
variant calling workflow processing. The system uses
novel architecture and algorithms to match against
partial reference genomes to exploit smaller memory
sizes which are typically available in tiny processing
systems. Extensive tests on a standard benchmark
data-set (NA12878 Illumina platinum genome) confirm
that the time consumed for the data transfer and
completing variant calling workflow on SWARAM was
competitive to that of a 32-core Intel Xeon server with
similar accuracy, but costs less than a fifth, and
consumes less than 40\% of the energy of the server
system. The original scripts and code we developed for
executing the variant calling workflow on SWARAM are
available in the associated Github repository
https://github.com/Rammohanty/swaram.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kim:2019:AAI,
author = "Jihye Kim and Jiwon Lee and Hankyung Ko and Donghwan
Oh and Semin Han and Gwonho Jeong and Hyunok Oh",
title = "{AuthCropper}: Authenticated Image Cropper for Privacy
Preserving Surveillance Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "62:1--62:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358195",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358195",
abstract = "As surveillance systems are popular, the privacy of
the recorded video becomes more important. On the other
hand, the authenticity of video images should be
guaranteed when used as evidence in court. It is
challenging to satisfy both (personal) privacy and
authenticity of a video simultaneously, since the
privacy requires modifications (e.g., partial
deletions) of an original video image while the
authenticity does not allow any modifications of the
original image. This paper proposes a novel method to
convert an encryption scheme to support partial
decryption with a constant number of keys and construct
a privacy-aware authentication scheme by combining with
a signature scheme. The security of our proposed scheme
is implied by the security of the underlying encryption
and signature schemes. Experimental results show that
the proposed scheme can handle the UHD video stream
with more than 17 fps on a real embedded system, which
validates the practicality of the proposed scheme.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fong:2019:ODS,
author = "Daniel D. Fong and Vivek J. Srinivasan and Kourosh
Vali and Soheil Ghiasi",
title = "Optode Design Space Exploration for Clinically-robust
Non-invasive Fetal Oximetry",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "63:1--63:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358207",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358207",
abstract = "Non-invasive transabdominal fetal oximetry (TFO) has
the potential to improve delivery outcomes by providing
physicians with an objective metric of fetal well-being
during labor. Fundamentally, the technology is based on
sending light through the maternal abdomen to
investigate deep fetal tissue, followed by detection
and processing of the light that returns (via
scattering) to the outside of the maternal abdomen. The
placement of the photodetector in relation to the light
source critically impacts TFO system performance,
including its operational robustness in the face of
fetal depth variation. However, anatomical differences
between pregnant women cause the fetal depths to vary
drastically, which further complicates the optical
probe (optode) design optimization. In this paper, we
present a methodology to solve this problem. We frame
optode design space exploration as a multi-objective
optimization problem, where hardware complexity (cost)
and performance across a wider patient population
(robustness) form competing objectives. We propose a
model-based approach to characterize the Pareto-optimal
points in the optode design space, through which a
specific design is selected. Experimental evaluation
via simulation and in vivo measurement on pregnant
sheep support the efficacy of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Siddhu:2019:PLA,
author = "Lokesh Siddhu and Preeti Ranjan Panda",
title = "{PredictNcool}: Leakage Aware Thermal Management for
{$3$D} Memories Using a Lightweight Temperature
Predictor",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "64:1--64:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358208",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358208",
abstract = "Recent research on mitigating thermal problems in 3D
memories has covered reactive strategies that reduce
memory power consumption, and thereby, performance,
when the memory temperature reaches the maximum
operating limit. Such techniques could benefit from
temperature prediction and avoid unnecessary
invocations and state transitions of the thermal
management strategy. We develop an accurate steady
state temperature predictor for thermal management of
3D memories. We utilize the symmetries in the
floorplan, along with other design insights, to reduce
the predictor's model parameters, making it lightweight
and suitable for runtime thermal management. Using the
temperature prediction, we introduce PredictNcool, a
proactive thermal management strategy to reduce
application runtime and memory energy. We compare
PredictNcool with two recent thermal management
strategies and our experiments show that the proposed
optimization results in performance improvements of
28\% and 5\%, and memory subsystem energy reductions of
38\% and 12\% (on average).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ma:2019:RFD,
author = "Chenlin Ma and Zhaoyan Shen and Lei Han and Zili
Shao",
title = "{RMW-F}: a Design of {RMW-Free} Cache Using Built-in
{NAND-Flash} for {SMR} Storage",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "65:1--65:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358210",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358210",
abstract = "Shingled Magnetic Recording (SMR) disks have been
proposed as a high-density, non-volatile media and
precede traditional hard disk drives in both storing
capacity and cost. However, the intrinsic
characteristics of SMR disks raise a major performance
challenge named read-modify-write operations (RMWs)
that are time-consuming and can significantly degrade
the overall system performance. Current designs of SMR
disks usually adopt a persistent cache to alleviate the
negative effect brought by RMWs and the cache is used
as a first-level cache to buffer all the incoming
writes of the whole SMR storage system. In this paper,
we propose to change the functionality of the cache,
that is, the cache will no longer serve as a
first-level cache like previous. Incoming data are
distinguished according to their different write-back
behavior and those data which will incur RMWs will be
left in our built-in NAND flash cache called RMW-free
Cache (RMW-F) to eliminate the need of RMWs. Besides,
RMW-F improves the cleaning efficiency by a model that
takes both write-back cost and data popularity into
considerations. Our experimental results show that
RMW-F can achieve both system performance and cleaning
efficiency improvements.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2019:ESW,
author = "Yu-Pei Liang and Tseng-Yi Chen and Yuan-Hao Chang and
Shuo-Han Chen and Kam-Yiu Lam and Wei-Hsin Li and
Wei-Kuan Shih",
title = "Enabling Sequential-write-constrained {B+}-tree Index
Scheme to Upgrade Shingled Magnetic Recording Storage
Performance",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "66:1--66:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358201",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358201",
abstract = "When a shingle magnetic recording (SMR) drive has been
widely applied to modern computer systems (e.g.,
archive file systems, big data computing systems, and
large-scale database systems), storage system
developers should thoroughly review whether current
designs (e.g., index schemes and data placements) are
appropriate for an SMR drive because of its sequential
write constraint. Through many prior works excellently
manage data in an SMR drive by integrating their
proposed solutions into the driver layer, an index
scheme over an SMR drive has never been optimized by
any previous works because managing index over the SMR
drive needs to jointly consider the properties of B$^+$
-tree and SMR natures (e.g., sequential write
constraint and zone partitions) in a host storage
system. Moreover, poor index management will result in
terrible storage performance because an index manager
is extensively used in file systems and database
applications. For optimizing the B$^+$ -tree index
structure over an SMR storage, this work identifies
performance overheads caused by the B$^+$ -tree index
structure in an SMR drive. By such observation, this
study proposes a sequential-write-constrained B$^+$
-tree index scheme, namely SW-B$^+$ tree, which
consists of an address redirection data structure, an
SMR-aware node allocation mechanism, and a
frequency-aware garbage collection strategy. According
to our experiments, the SW-B$^+$ tree can improve the
SMR storage performance 55\% on average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jiang:2019:ASL,
author = "Weiwen Jiang and Edwin H.-M. Sha and Xinyi Zhang and
Lei Yang and Qingfeng Zhuge and Yiyu Shi and Jingtong
Hu",
title = "Achieving Super-Linear Speedup across Multi-{FPGA} for
Real-Time {DNN} Inference",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "67:1--67:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358192",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358192",
abstract = "Real-time Deep Neural Network (DNN) inference with
low-latency requirement has become increasingly
important for numerous applications in both cloud
computing (e.g., Apple's Siri) and edge computing
(e.g., Google/Waymo's driverless car). FPGA-based DNN
accelerators have demonstrated both superior
flexibility and performance; in addition, for real-time
inference with low batch size, FPGA is expected to
achieve further performance improvement. However, the
performance gain from the single-FPGA design is
obstructed by the limited on-chip resource. In this
paper, we employ multiple FPGAs to cooperatively run
DNNs with the objective of achieving super-linear
speed-up against single-FPGA design. In implementing
such systems, we found two barriers that hinder us from
achieving the design goal: (1) the lack of a clear
partition scheme for each DNN layer to fully exploit
parallelism, and (2) the insufficient bandwidth between
the off-chip memory and the accelerator due to the
growing size of DNNs. To tackle these issues, we
propose a general framework, ``Super-LIP'', which can
support different kinds of DNNs. In this paper, we take
Convolutional Neural Network (CNN) as a vehicle to
illustrate Super-LIP. We first formulate an accurate
system-level model to support the exploration of best
partition schemes. Then, we develop a novel design
methodology to effectively alleviate the heavy loads on
memory bandwidth by moving traffic from memory bus to
inter-FPGA links. We implement Super-LIP based on
ZCU102 FPGA boards. Results demonstrate that Super-LIP
with 2 FPGAs can achieve $ 3.48 \times $ speedup,
compared to the state-of-the-art single-FPGA design.
What is more, as the number of FPGAs scales up, the
system latency can be further reduced while maintaining
high energy efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2019:ALA,
author = "Wei-Chen Wang and Yuan-Hao Chang and Tei-Wei Kuo and
Chien-Chung Ho and Yu-Ming Chang and Hung-Sheng Chang",
title = "Achieving Lossless Accuracy with Lossy Programming for
Efficient Neural-Network Training on {NVM}-Based
Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "68:1--68:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358191",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358191",
abstract = "Neural networks over conventional computing platforms
are heavily restricted by the data volume and
performance concerns. While non-volatile memory offers
potential solutions to data volume issues, challenges
must be faced over performance issues, especially with
asymmetric read and write performance. Beside that,
critical concerns over endurance must also be resolved
before non-volatile memory could be used in reality for
neural networks. This work addresses the performance
and endurance concerns altogether by proposing a
data-aware programming scheme. We propose to consider
neural network training jointly with respect to the
data-flow and data-content points of view. In
particular, methodologies with approximate results over
Dual-SET operations were presented. Encouraging results
were observed through a series of experiments, where
great efficiency and lifetime enhancement is seen
without sacrificing the result accuracy.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2019:DAS,
author = "Zhengguo Chen and Quan Deng and Nong Xiao and Kirk
Pruhs and Youtao Zhang",
title = "{DWMAcc}: Accelerating Shift-based {CNNs} with Domain
Wall Memories",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "69:1--69:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358199",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358199",
abstract = "PIM (processing-in-memory) based hardware accelerators
have shown great potentials in addressing the
computation and memory access intensity of modern CNNs
(convolutional neural networks). While adopting NVM
(non-volatile memory) helps to further mitigate the
storage and energy consumption overhead, adopting
quantization, e.g., shift-based quantization, helps to
tradeoff the computation overhead and the accuracy
loss, integrating both NVM and quantization in hardware
accelerators leads to sub-optimal acceleration. In this
paper, we exploit the natural shift property of DWM
(domain wall memory) to devise DWMAcc, a DWM-based
accelerator with asymmetrical storage of weight and
input data, to speed up the inference phase of
shift-based CNNs. DWMAcc supports flexible shift
operations to enable fast processing with low
performance and area overhead. We then optimize it with
zero-sharing, input-reuse, and weight-share schemes.
Our experimental results show that, on average, DWMAcc
achieves $ 16.6 \times $ performance improvement and $
85.6 \times $ energy consumption reduction over a
state-of-the-art SRAM based design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dave:2019:DEP,
author = "Shail Dave and Youngbin Kim and Sasikanth Avancha and
Kyoungwoo Lee and Aviral Shrivastava",
title = "{dMazeRunner}: Executing Perfectly Nested Loops on
Dataflow Accelerators",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "70:1--70:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358198",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358198",
abstract = "Dataflow accelerators feature simplicity,
programmability, and energy-efficiency and are
visualized as a promising architecture for accelerating
perfectly nested loops that dominate several important
applications, including image and media processing and
deep learning. Although numerous accelerator designs
are being proposed, how to discover the most efficient
way to execute the perfectly nested loop of an
application onto computational and memory resources of
a given dataflow accelerator (execution method) remains
an essential and yet unsolved challenge. In this paper,
we propose dMazeRunner --- to efficiently and
accurately explore the vast space of the different ways
to spatiotemporally execute a perfectly nested loop on
dataflow accelerators (execution methods). The novelty
of dMazeRunner framework is in: (i) a holistic
representation of the loop nests, that can succinctly
capture the various execution methods, (ii) accurate
energy and performance models that explicitly capture
the computation and communication patterns, data
movement, and data buffering of the different execution
methods, and (iii) drastic pruning of the vast search
space by discarding invalid solutions and the solutions
that lead to the same cost. Our experiments on various
convolution layers (perfectly nested loops) of popular
deep learning applications demonstrate that the
solutions discovered by dMazeRunner are on average $
9.16 \times $ better in Energy-Delay-Product (EDP) and
$ 5.83 \times $ better in execution time, as compared
to prior approaches. With additional pruning
heuristics, dMazeRunner reduces the search time from
days to seconds with a mere 2.56\% increase in EDP, as
compared to the optimal solution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Spellini:2019:CDM,
author = "Stefano Spellini and Michele Lora and Franco Fummi and
Sudipta Chattopadhyay",
title = "Compositional Design of Multi-Robot Systems Control
Software on {ROS}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "71:1--71:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358197",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358197",
abstract = "This paper presents a methodology that relies on
Assume-Guarantee Contracts to decompose the problem of
synthesizing control software for a multi-robot system.
Initially, each contract describes either a component (
e.g., a robot) or an aspect of the system. Then, the
design problem is decomposed into different synthesis
and verification sub-problems, allowing to tackle the
complexity involved in the design process. The design
problem is then recomposed by exploiting the
rigorousness provided by contracts. This allows us to
achieve system-level simulation capable to be used for
validating the entire design. Once validated, the
software synthesized during the process can be
integrated into Robot Operating System (ROS) nodes and
executed using state-of-the-practice packages and tools
for modern robotic systems. We apply the methodology to
generate a control strategy for an autonomous goods
transportation system. Our results show a massive
reduction of the time required to obtain automatically
the control software implementing a multi-robot
mission.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mendis:2019:ADU,
author = "Hashan Roshantha Mendis and Pi-Cheng Hsiu",
title = "Accumulative Display Updating for Intermittent
Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "72:1--72:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358190",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358190",
abstract = "Electrophoretic displays are ideal for self-powered
systems, but currently require an uninterrupted power
supply to carry out the full display update cycle.
Although sensible for battery-powered devices, when
directly applied to intermittently-powered systems,
guaranteeing display update atomicity usually results
in repeated execution until completion or can incur
high hardware/software overheads, heavy programmer
intervention and large energy buffering requirements to
provide sufficient display update energy. This paper
introduces the concept, design and implementation of
accumulative display updating, which relaxes the
atomicity constraints of display updating, such that
the display update process can be accumulatively
completed across power cycles, without the need for
sufficient energy for the entire display update. To
allow for process logical continuity, we track the
update progress during execution and facilitate a safe
display shutdown procedure to overcome physical and
operability issues related to abrupt power failure.
Additionally, a context-aware updating policy is
proposed to handle data freshness issues, where the
delay in addressing new update requests can cause the
display contents to be in conflict with new data
available. Experimental results on a Texas Instruments
device with an integrated electrophoretic display show
that, compared to atomic display updating, our design
can significantly increase accurate forward progress,
decrease the average response time of display updating
and reduce time and energy wastage when displaying
fresh data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seyoum:2019:FFO,
author = "Biruk B. Seyoum and Alessandro Biondi and Giorgio C.
Buttazzo",
title = "{FLORA}: {FLoorplan} Optimizer for Reconfigurable
Areas in {FPGAs}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "73:1--73:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358202",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358202",
abstract = "Floorplanning is a mandatory step in the design of
hardware accelerators for FPGA platforms, especially
when adopting dynamic partial reconfiguration (DPR).
This paper presents FLORA, an automated floorplanner
based on optimization via Mixed-Integer Linear
Programming (MILP). The floorplanning problem is solved
by means of a novel fine-grained modeling strategy of
FPGA resources. Furthermore, differently from other
proposals, our approach takes into account several
realistic Partial Reconfiguration (PR) floorplanning
constraints on FPGAs. FLORA was compared against
state-of-the-art floorplanners by means of benchmark
suites, showing that it is capable of providing better
performance in terms of resource consumption, maximum
inter-region, wire-length, and running time required to
produce the solutions. Finally, FLORA was utilized to
generate placements for a partially-reconfigurable
video processing engine that was implemented on a
Xilinx Zynq-7020.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Moazzemi:2019:HFL,
author = "Kasra Moazzemi and Biswadip Maity and Saehanseul Yi
and Amir M. Rahmani and Nikil Dutt",
title = "{HESSLE--FREE}: Heterogeneous Systems Leveraging Fuzzy
Control for Runtime Resource Management",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "74:1--74:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358203",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358203",
abstract = "As computing platforms increasingly embrace
heterogeneity, runtime resource managers need to
efficiently, dynamically, and robustly manage shared
resources (e.g., cores, power budgets, memory
bandwidth). To address the complexities in
heterogeneous systems, state-of-the-art techniques that
use heuristics or machine learning have been proposed.
On the other hand, conventional control theory can be
used for formal guarantees, but may face unmanageable
complexity for modeling system dynamics of complex
heterogeneous systems. We address this challenge
through HESSLE-FREE (Heterogeneous Systems Leveraging
Fuzzy Control for Runtime Resource Management): an
approach leveraging fuzzy control theory that combines
the strengths of classical control theory together with
heuristics to form a light-weight, agile, and efficient
runtime resource manager for heterogeneous systems. We
demonstrate the efficacy of HESSLE-FREE executing on a
NVIDIA Jetson TX2 platform (containing a heterogeneous
multi-processor with a GPU) to show that HESSLE-FREE:
(1) provides opportunity for optimization in the
controller and stability analysis to enhance the
confidence in the reliability of the system; (2)
coordinates heterogeneous compute units to achieve
desired objectives (e.g., QoS, optimal power
references, FPS) efficiently and with lower complexity,
and (3) eases the burden of system specification.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vashist:2019:UTS,
author = "Abhishek Vashist and Andrew Keats and Sai Manoj
Pudukotai Dinakarrao and Amlan Ganguly",
title = "Unified Testing and Security Framework for Wireless
Network-on-Chip Enabled Multi-Core Chips",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "75:1--75:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358212",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358212",
abstract = "On-chip wireless interconnects have been demonstrated
to improve the performance and energy consumption of
data communication in Network-on-Chips (NoCs). However,
the wireless interfaces (WIs) can be defective,
rendering these broken links severely affect the
performance. This makes manufacturing test of the WIs
critical. While analog testing of the transceivers is
possible, such methodologies are impractical in a
Wireless NoC (WiNoC) due to large overheads. In
addition to testing, security is another prominent
challenge in WiNoCs, as the security breach can happen
due to embedded hardware Trojans or through external
attacker exploiting the wireless medium. The typical
security measures used in general wireless networks are
not practical in a WiNoC due to unique network
architectures and performance requirements of such a
system. However, both testing and security defense can
potentially leverage a basic monitoring framework
which, can detect malfunctions or anomalies. Based on
this idea, we propose a unified architecture for
testing and attack detection and protection of on-chip
wireless interconnects. We adopt a Built-In-Self Test
(BIST) methodology to enable online monitoring of the
wireless interconnects which can also be reused for
monitoring the security threats. We focus on
manufacturing defects of the WIs for testing and
persistent jamming attack for the security measures, as
this kind of attack is most likely on wireless
communication systems. The BIST methodology is capable
of detecting faults in the wireless links with a low
aliasing probability of $ 2.32 \times 10^{-10} $.
Additionally, the proposed unified architecture is able
to detect the persistent jamming with an accuracy of
99.87\% and suffer $<$ 3\% communication bandwidth
degradation even in the presence of attacks from either
internal or external sources.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dugo:2019:CLC,
author = "Alexy Torres Aurora Dugo and Jean-Baptiste Lefoul and
Felipe Gohring {De Magalhaes} and Dahman Assal and
Gabriela Nicolescu",
title = "Cache Locking Content Selection Algorithms for
{ARINC-653} Compliant {RTOS}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "76:1--76:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358196",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358196",
abstract = "Avionic software is the subject of stringent real
time, determinism and safety constraints. Software
designers face several challenges, one of them being
the interferences that appear in common situations,
such as resource sharing. The interferences introduce
non-determinism and delays in execution time. One of
the main interference prone resources are cache
memories. In single-core processors, caches comprise
multiple private levels. This breaks the isolation
principle imposed by avionic standards, such as the
ARINC-653. This standard defines partitioned
architectures where one partition should never directly
interfere with another one. In cache-based
architectures, one partition can modify the cache
content of another partition. In this paper, we propose
a method based on cache locking to reduce the
non-determinism and the contention on lower level
memories while improving the time performances.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Deshwal:2019:MMO,
author = "Aryan Deshwal and Nitthilan Kanappan Jayakodi and
Biresh Kumar Joardar and Janardhan Rao Doppa and Partha
Pratim Pande",
title = "{MOOS}: a Multi-Objective Design Space Exploration and
Optimization Framework for {NoC} Enabled Manycore
Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "77:1--77:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358206",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358206",
abstract = "The growing needs of emerging applications has posed
significant challenges for the design of optimized
manycore systems. Network-on-Chip (NoC) enables the
integration of a large number of processing elements
(PEs) in a single die. To design optimized manycore
systems, we need to establish suitable trade-offs among
multiple objectives including power, performance, and
thermal. Therefore, we consider multi-objective design
space exploration (MO-DSE) problems arising in the
design of NoC-enabled manycore systems: placement of
PEs and communication links to optimize two or more
objectives (e.g., latency, energy, and throughput).
Existing algorithms to solve MO-DSE problems suffer
from scalability and accuracy challenges as size of the
design space and the number of objectives grow. In this
paper, we propose a novel framework referred as
Multi-Objective Optimistic Search (MOOS) that performs
adaptive design space exploration using a data-driven
model to improve the speed and accuracy of
multi-objective design optimization process. We apply
MOOS to design both 3D heterogeneous and homogeneous
manycore systems using Rodinia, PARSEC, and SPLASH2
benchmark suites. We demonstrate that MOOS improves the
speed of finding solutions compared to state-of-the-art
methods by up to 13X while uncovering designs that are
up to 20\% better in terms of NoC. The optimized 3D
manycore systems improve the EDP up to 38\% when
compared to 3D mesh-based designs optimized for the
placement of PEs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Smirnov:2019:IGM,
author = "Fedor Smirnov and Behnaz Pourmohseni and Michael
Gla{\ss} and J{\"u}rgen Teich",
title = "{IGOR}, Get Me the Optimum! {Prioritizing} Important
Design Decisions During the {DSE} of Embedded Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "78:1--78:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358204",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358204",
abstract = "Design Space Exploration (DSE) techniques for complex
embedded systems must cope with a huge variety of
applications and target architectures as well as a wide
spectrum of objectives and constraints. In particular,
existing design automation approaches are either
problem-independent, in that they do not exploit any
knowledge about the optimization problem at hand, or
are tailored to specific a priori assumptions about the
problem and/or a specific set of design objectives.
While the latter are only applicable within a very
limited scope of design problems, the former may
struggle to deliver high-quality solutions for problems
with large design spaces and/or complex design
objectives. As a remedy, we propose Importance-Guided
Order Rearrangement (IGOR) as a novel approach for DSE
of embedded systems. Instead of relying on an a priori
problem knowledge, IGOR uses a
machine-learning-inspired technique to dynamically
analyze the importance of design decisions, i.e., the
impact that these decisions-within the specific problem
that is being optimized-have on the quality of explored
problem solutions w.r.t. the given design objectives.
Throughout the DSE, IGOR uses this information to guide
the optimization towards the most promising regions of
the design space. Experimental results for a variety of
applications from different domains of embedded
computing and for different optimization scenarios give
evidence that the proposed approach is both scalable
and adaptable, as it can be used for the optimization
of systems described by several thousands constraints,
where it outperforms both problem-specific and
problem-independent optimization approaches and
achieves $ \epsilon $-dominance improvements of up to
95\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cheng:2019:AVE,
author = "Zhongqi Cheng and Rainer D{\"o}mer",
title = "Analyzing Variable Entanglement for Parallel
Simulation of {SystemC TLM-2.0} Models",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "79:1--79:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358194",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358194",
abstract = "The SystemC TLM-2.0 standard is widely used in modern
electronic system level design for better
interoperability and higher simulation speed. However,
TLM-2.0 has been identified as an obstacle for parallel
SystemC simulation due to the disappearance of
channels. Without a containment construct, simulation
threads are permitted to directly access data of other
modules and that makes it difficult to synchronize such
accesses as required by the SystemC execution
semantics. In this paper, we propose a compile time
approach to statically analyze potential conflicts
among threads in SystemC TLM-2.0 loosely- and
approximately-timed models. We introduce a new Socket
Call Path technique which provides the compiler with
socket binding information for precise static analysis.
We also propose an algorithm to analyze entangled
variable pairs. Experimental results show that our
approach is able to support automatically safe parallel
simulation of SystemC models with TLM-2.0 Blocking
Transport Interface, Direct Memory Interface and
Non-blocking Transport Interface, resulting in
impressive simulation speeds.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2019:ETM,
author = "Minjun Seo and Fadi Kurdahi",
title = "Efficient Tracing Methodology Using Automata
Processor",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "80:1--80:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358200",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358200",
abstract = "Tracing or trace interface has been used in various
ways to find system defects or bugs. As embedded
systems are increasingly used in safety-critical
applications, tracing can provide useful information
during system execution at runtime. Non-intrusive
tracing that does not affect system performance has
become especially important, but unfortunately, the
biggest obstacle to this approach was the vast amount
of real-time trace data, making it challenging to
address complex requirements with relatively limited
hardware implementations. Automata processors can be
programmed with a memory-like structure of automata and
have a structure specific to streaming data, large
capacity, and parallel processing functions. This paper
promotes the idea of high-level system-on-chip
monitoring using automata processors. We used a
safety-critical pacemaker application in the
experiments, described timed automata (TA)-based
requirements, and tested intentionally injected 4,000
random failures. The TA model converted for Automata
Processor to monitor system, correctness, and safety
properties achieved 100\% failure detection rate in the
experiment, and the detected failure is reported as
fast enough to allow enough extent for failure
recovery.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Brais:2019:AAM,
author = "Hadi Brais and Preeti Ranjan Panda",
title = "{Alleria}: an Advanced Memory Access Profiling
Framework",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "81:1--81:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358193",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358193",
abstract = "Application analysis and simulation tools are used
extensively by embedded system designers to improve
existing optimization techniques or develop new ones.
We propose the Alleria framework to make it easier for
designers to comprehensively collect critical
information such as virtual and physical memory
addresses, accessed values, and thread schedules about
one or more target applications. Such profilers often
incur substantial performance overheads that are orders
of magnitude larger than native execution time. We
discuss how that overhead can be significantly reduced
using a novel profiling mechanism called adaptive
profiling. We develop a heuristic-based adaptive
profiling mechanism and evaluate its performance using
single-threaded and multi-threaded applications. The
proposed technique can improve profiling throughput by
up to 145\% and by 37\% on an average, enabling Alleria
to be used to comprehensively profile applications with
a throughput of over 3 million instructions per
second.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhardwaj:2019:MCA,
author = "Kartikeya Bhardwaj and Ching-Yi Lin and Anderson
Sartor and Radu Marculescu",
title = "Memory- and Communication-Aware Model Compression for
Distributed Deep Learning Inference on {IoT}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "82:1--82:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358205",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358205",
abstract = "Model compression has emerged as an important area of
research for deploying deep learning models on
Internet-of-Things (IoT). However, for extremely
memory-constrained scenarios, even the compressed
models cannot fit within the memory of a single device
and, as a result, must be distributed across multiple
devices. This leads to a distributed inference paradigm
in which memory and communication costs represent a
major bottleneck. Yet, existing model compression
techniques are not communication-aware. Therefore, we
propose Network of Neural Networks (NoNN), a new
distributed IoT learning paradigm that compresses a
large pretrained `teacher' deep network into several
disjoint and highly-compressed `student' modules,
without loss of accuracy. Moreover, we propose a
network science-based knowledge partitioning algorithm
for the teacher model, and then train individual
students on the resulting disjoint partitions.
Extensive experimentation on five image classification
datasets, for user-defined memory/performance budgets,
show that NoNN achieves higher accuracy than several
baselines and similar accuracy as the teacher model,
while using minimal communication among students.
Finally, as a case study, we deploy the proposed model
for CIFAR-10 dataset on edge devices and demonstrate
significant improvements in memory footprint (up to $
24 \times $), performance (up to $ 12 \times $), and
energy per node (up to $ 14 \times $) compared to the
large teacher model. We further show that for
distributed inference on multiple edge devices, our
proposed NoNN model results in up to $ 33 \times $
reduction in total latency w.r.t. a state-of-the-art
model compression baseline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Barijough:2019:QLA,
author = "Kamyar Mirzazad Barijough and Zhuoran Zhao and Andreas
Gerstlauer",
title = "Quality\slash Latency-Aware Real-time Scheduling of
Distributed Streaming {IoT} Applications",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "83:1--83:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358209",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358209",
abstract = "Embedded systems are increasingly networked and
distributed, often, such as in the Internet of Things
(IoT), over open networks with potentially unbounded
delays. A key challenge is the need for real-time
guarantees over such inherently unreliable and
unpredictable networks. Generally, timeouts are used to
provide timing guarantees while trading off data losses
and quality. The schedule of distributed task
executions and network timeouts thereby determines a
fundamental latency-quality trade-off that is, however,
not taken into account by existing scheduling
algorithms. In this paper, we propose an approach for
scheduling of distributed, real-time streaming
applications under quality-latency goals. We formulate
this as a problem of analytically deriving a static
worst-case schedule of a given distributed dataflow
graph that minimizes quality loss while meeting
guaranteed latency constraints. Towards this end, we
first develop a quality model that estimates SNR of
distributed streaming applications under given network
characteristics and an overall linearity assumption.
Using this quality model, we then formulate and solve
the scheduling of distributed dataflow graphs as a
numerical optimization problem. Simulation results with
random graphs show that quality/latency-aware
scheduling improves SNR over a baseline schedule by
50\% on average. When applied to a distributed neural
network application for handwritten digit recognition,
our scheduling methodology can improve classification
accuracy by 10\% over a naive distribution under tight
latency constraints.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2019:DES,
author = "Youchao Wang and Sam Willis and Vasileios Tsoutsouras
and Phillip Stanley-Marbell",
title = "Deriving Equations from Sensor Data Using Dimensional
Function Synthesis",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "84:1--84:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358218",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358218",
abstract = "We present a new method for deriving functions that
model the relationship between multiple signals in a
physical system. The method, which we call dimensional
function synthesis, applies to data streams where the
dimensions of the signals are known. The method
comprises two phases: a compile-time synthesis phase
and a subsequent calibration using sensor data. We
implement dimensional function synthesis and use the
implementation to demonstrate efficiently summarizing
multi-modal sensor data for two physical systems using
90 laboratory experiments and 10,000 synthetic
idealized measurements. We evaluate the performance of
the compile-time phase of dimensional function
synthesis as well as the calibration phase overhead,
inference latency, and accuracy of the models our
method generates. The results show that our technique
can generate models in less than 300 ms on average
across all the physical systems we evaluated. When
calibrated with sensor data, our models outperform
traditional regression and neural network models in
inference accuracy in all the cases we evaluated. In
addition, our models perform better in training latency
(over $ 8660 \times $ improvement) and required
arithmetic operations in inference (over $ 34 \times $
improvement). These significant gains are largely the
result of exploiting information on the physics of
signals that has hitherto been ignored.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dai:2019:DMS,
author = "Xiaotian Dai and Wanli Chang and Shuai Zhao and Alan
Burns",
title = "A Dual-Mode Strategy for Performance-Maximisation and
Resource-Efficient {CPS} Design",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "85:1--85:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358213",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358213",
abstract = "The emerging scenarios of cyber-physical systems
(CPS), such as autonomous vehicles, require
implementing complex functionality with limited
resources, as well as high performances. This paper
considers a common setup in which multiple control and
non-control tasks share one processor, and proposes a
dual-mode strategy. The control task switches between
two sampling periods when rejecting (coping with) a
disturbance. We create an optimisation framework
looking for the switching sampling periods and time
instants that maximise the control performance (indexed
by settling time) and resource efficiency (indexed by
the number of tasks that are schedulable on the
processor). The latter objective is enabled with
schedulability analysis tailored for the dual-mode
model. Experimental results show that (i) given a set
of tasks, the proposed strategy improves the control
performances whilst retaining schedulability; and (ii)
given requirements on the control performances, the
proposed strategy is able to schedule more tasks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Passerone:2019:CEC,
author = "Roberto Passerone and {\'I}{\~n}igo {\'I}ncer Romeo
and Alberto L. Sangiovanni-Vincentelli",
title = "Coherent Extension, Composition, and Merging Operators
in Contract Models for System Design",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "86:1--86:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358216",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358216",
abstract = "Contract models have been proposed to promote and
facilitate reuse and distributed development. In this
paper, we cast contract models into a coherent
formalism used to derive general results about the
properties of their operators. We study several
extensions of the basic model, including the
distinction between weak and strong assumptions and
maximality of the specification. We then analyze the
disjunction and conjunction operators, and show how
they can be broken up into a sequence of simpler
operations. This leads to the definition of a new
contract viewpoint merging operator, which better
captures the design intent in contrast to the more
traditional conjunction. The adjoint operation, which
we call separation, can be used to re-partition the
specification into different viewpoints. We show the
symmetries of these operations with respect to
composition and quotient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bataineh:2019:EDL,
author = "Omar Bataineh and David S. Rosenblum and Mark
Reynolds",
title = "Efficient Decentralized {LTL} Monitoring Framework
Using Tableau Technique",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "87:1--87:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358219",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358219",
abstract = "This paper presents a novel framework for
decentralized monitoring of Linear Temporal Logic (LTL)
formulas, under the situation where processes are
synchronous and the formula is represented as a
tableau. The tableau technique allows one to construct
a semantic tree for the input LTL formula, which can be
used to optimize the decentralized monitoring of LTL in
various ways. Given a system P and an LTL formula $
\varphi $, we construct a tableau $ T_\varphi $. The
tableau $ T_\varphi $ is used for two purposes: (a) to
synthesize an efficient round-robin communication
policy for processes, and (b) to find the minimal ways
to decompose the formula and communicate observations
of processes in an efficient way. In our framework,
processes can propagate truth values of both atomic and
compound formulas (non-atomic formulas) depending on
the syntactic structure of the input LTL formula and
the observation power of processes. We demonstrate that
this approach of decentralized monitoring based on
tableau construction is more straightforward, more
flexible, and more likely to yield efficient solutions
than alternative approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baumeister:2019:FSM,
author = "Jan Baumeister and Bernd Finkbeiner and Maximilian
Schwenger and Hazem Torfah",
title = "{FPGA} Stream-Monitoring of Real-time Properties",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "88:1--88:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358220",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358220",
abstract = "An essential part of cyber-physical systems is the
online evaluation of real-time data streams. Especially
in systems that are intrinsically safety-critical, a
dedicated monitoring component inspecting data streams
to detect problems at runtime greatly increases the
confidence in a safe execution. Such a monitor needs to
be based on a specification language capable of
expressing complex, high-level properties using only
the accessible low-level signals. Moreover, tight
constraints on computational resources exacerbate the
requirements on the monitor. Thus, several existing
approaches to monitoring are not applicable due to
their dependence on an operating system. We present an
FPGA-based monitoring approach by compiling an RTL ola
specification into synthesizable VHDL code. RTLola is a
stream-based specification language capable of
expressing complex real-time properties while providing
an upper bound on the execution time and memory
requirements. The statically determined memory bound
allows for a compilation to an FPGA with a fixed size.
An advantage of FPGAs is a simple integration process
in existing systems and superb executing time. The
compilation results in a highly parallel implementation
thanks to the modular nature of RTLola specifications.
This further increases the maximal event rate the
monitor can handle.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bajczi:2019:WMP,
author = "Levente Bajczi and Andr{\'a}s V{\"o}r{\"o}s and Vince
Moln{\'a}r",
title = "Will My Program Break on This Faulty Processor?:
{Formal} Analysis of Hardware Fault Activations in
Concurrent Embedded Software",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "89:1--89:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358238",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358238",
abstract = "Formal verification is approaching a point where it
will be reliably applicable to embedded software. Even
though formal verification can efficiently analyze
multi-threaded applications, multi-core processors are
often considered too dangerous to use in critical
systems, despite the many benefits they can offer. One
reason is the advanced memory consistency model of such
CPUs. Nowadays, most software verifiers assume strict
sequential consistency, which is also the na{\"\i}ve
view of programmers. Modern multi-core processors,
however, rarely guarantee this assumption by default.
In addition, complex processor architectures may easily
contain design faults. Thanks to the recent advances in
hardware verification, these faults are increasingly
visible and can be detected even in existing
processors, giving an opportunity to compensate for the
problem in software. In this paper, we propose a
generic approach to consider inconsistent behavior of
the hardware in the analysis of software. Our approach
is based on formal methods and can be used to detect
the activation of existing hardware faults on the
application level and facilitate their mitigation in
software. The approach relies heavily on recent results
of model checking and hardware verification and offers
new, integrative research directions. We propose a
partial solution based on existing model checking tools
to demonstrate feasibility and evaluate their
performance in this context.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2019:TAS,
author = "Youngmoon Lee and Kang G. Shin and Hoon Sung Chwa",
title = "Thermal-Aware Scheduling for Integrated {CPUs--GPU}
Platforms",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "90:1--90:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358235",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358235",
abstract = "As modern embedded systems like cars need high-power
integrated CPUs--GPU SoCs for various real-time
applications such as lane or pedestrian detection, they
face greater thermal problems than before, which may,
in turn, incur higher failure rate and cooling cost. We
demonstrate, via experimentation on a representative
CPUs--GPU platform, the importance of accounting for
two distinct thermal characteristics-the platform's
temperature imbalance and different power dissipations
of different tasks -in real-time scheduling to avoid
any burst of power dissipations while guaranteeing all
timing constraints. To achieve this goal, we propose a
new Real-Time Thermal-Aware Scheduling (RT-TAS)
framework. We first capture different CPU cores'
temperatures caused by different GPU power dissipations
(i.e., CPUs--GPU thermal coupling) with core-specific
thermal coupling coefficients. We then develop
thermally-balanced task-to-core assignment and
CPUs--GPU co-scheduling. The former addresses the
platform's temperature imbalance by efficiently
distributing the thermal load across cores while
preserving scheduling feasibility. Building on the
thermally-balanced task assignment, the latter
cooperatively schedules CPU and GPU computations to
avoid simultaneous peak power dissipations on both CPUs
and GPU, thus mitigating excessive temperature rises
while meeting task deadlines. We have implemented and
evaluated RT-TAS on an automotive embedded platform to
demonstrate its effectiveness in reducing the maximum
temperature by 6-12.2${}^\circ $ C over existing
approaches without violating any task deadline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2019:TAF,
author = "Peng Chen and Weichen Liu and Xu Jiang and Qingqiang
He and Nan Guan",
title = "Timing-Anomaly Free Dynamic Scheduling of Conditional
{DAG} Tasks on Multi-Core Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "91:1--91:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358236",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358236",
abstract = "In this paper, we propose a novel approach to schedule
conditional DAG parallel tasks, with which we can
derive safe response time upper bounds significantly
better than the state-of-the-art counterparts. The main
idea is to eliminate the notorious timing anomaly in
scheduling parallel tasks by enforcing certain order
constraints among the vertices, and thus the response
time bound can be accurately predicted off-line by
somehow ``simulating'' the runtime scheduling. A key
challenge to apply the timing-anomaly free scheduling
approach to conditional DAG parallel tasks is that at
runtime it may generate exponentially many instances
from a conditional DAG structure. To deal with this
problem, we develop effective abstractions, based on
which a safe response time upper bound is computed in
polynomial time. We also develop algorithms to explore
the vertex orders to shorten the response time bound.
The effectiveness of the proposed approach is evaluated
by experiments with randomly generated DAG tasks with
different parameter configurations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2019:SVH,
author = "Yu Wang and Mojtaba Zarei and Borzoo Bonakdarpour and
Miroslav Pajic",
title = "Statistical Verification of Hyperproperties for
Cyber-Physical Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "92:1--92:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358232",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358232",
abstract = "Many important properties of cyber-physical systems
(CPS) are defined upon the relationship between
multiple executions simultaneously in continuous time.
Examples include probabilistic fairness and sensitivity
to modeling errors (i.e., parameters changes) for
real-valued signals. These requirements can only be
specified by hyperproperties. In this article, we focus
on verifying probabilistic hyperproperties for CPS. To
cover a wide range of modeling formalisms, we first
propose a general model of probabilistic uncertain
systems (PUSs) that unify commonly studied CPS models
such as continuous-time Markov chains (CTMCs) and
probabilistically parametrized Hybrid I/O Automata
(P$^2$ HIOA). To formally specify hyperproperties, we
propose a new temporal logic, hyper probabilistic
signal temporal logic (HyperPSTL) that serves as a
hyper and probabilistic version of the conventional
signal temporal logic (STL). Considering the complexity
of real-world systems that can be captured as PUSs, we
adopt a statistical model checking (SMC) approach for
their verification. We develop a new SMC technique
based on the direct computation of significance levels
of statistical assertions for HyperPSTL specifications,
which requires no a priori knowledge on the
indifference margin. Then, we introduce SMC algorithms
for HyperPSTL specifications on the joint probabilistic
distribution of multiple paths, as well as
specifications with nested probabilistic operators
quantifying different paths, which cannot be handled by
existing SMC algorithms. Finally, we show the
effectiveness of our SMC algorithms on CPS benchmarks
with varying levels of complexity, including the Toyota
Powertrain Control System.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Luo:2019:PFC,
author = "Zhengxiong Luo and Feilong Zuo and Yu Jiang and Jian
Gao and Xun Jiao and Jiaguang Sun",
title = "{Polar}: Function Code Aware Fuzz Testing of {ICS}
Protocol",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "93:1--93:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358227",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358227",
abstract = "Industrial Control System (ICS) protocols are widely
used to build communications among system components.
Compared with common internet protocols, ICS protocols
have more control over remote devices by carrying a
specific field called ``function code'', which assigns
what the receive end should do. Therefore, it is of
vital importance to ensure their correctness. However,
traditional vulnerability detection techniques such as
fuzz testing are challenged by the increasing
complexity of these diverse ICS protocols. In this
paper, we present a function code aware fuzzing
framework --- Polar, which automatically extracts
semantic information from the ICS protocol and utilizes
this information to accelerate security vulnerability
detection. Based on static analysis and dynamic taint
analysis, Polar initiates the values of the function
code field and identifies some vulnerable operations.
Then, novel semantic aware mutation and selection
strategies are designed to optimize the fuzzing
procedure. For evaluation, we implement Polar on top of
two popular fuzzers --- AFL and AFLFast, and conduct
experiments on several widely used ICS protocols such
as Modbus, IEC104, and IEC 61850. Results show that,
compared with AFL and AFLFast, Polar achieves the same
code coverage and bug detection numbers at the speed of
1.5X-12X. It also gains increase with 0\%--91\% more
paths within 24 hours. Furthermore, Polar has exposed
10 previously unknown vulnerabilities in those
protocols, 6 of which have been assigned unique CVE
identifiers in the US National Vulnerability
Database.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2019:STC,
author = "Youcheng Sun and Xiaowei Huang and Daniel Kroening and
James Sharp and Matthew Hill and Rob Ashmore",
title = "Structural Test Coverage Criteria for Deep Neural
Networks",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "94:1--94:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358233",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358233",
abstract = "Deep neural networks (DNNs) have a wide range of
applications, and software employing them must be
thoroughly tested, especially in safety-critical
domains. However, traditional software test coverage
metrics cannot be applied directly to DNNs. In this
paper, inspired by the MC/DC coverage criterion, we
propose a family of four novel test coverage criteria
that are tailored to structural features of DNNs and
their semantics. We validate the criteria by
demonstrating that test inputs that are generated with
guidance by our proposed coverage criteria are able to
capture undesired behaviours in a DNN. Test cases are
generated using a symbolic approach and a
gradient-based heuristic search. By comparing them with
existing methods, we show that our criteria achieve a
balance between their ability to find bugs (proxied
using adversarial examples and correlation with
functional coverage) and the computational cost of test
input generation. Our experiments are conducted on
state-of-the-art DNNs obtained using popular open
source datasets, including MNIST, CIFAR-10 and
ImageNet.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2019:GBM,
author = "Yi-Ting Lin and Hsiang Hsu and Shang-Chien Lin and
Chung-Wei Lin and Iris Hui-Ru Jiang and Changliu Liu",
title = "Graph-Based Modeling, Scheduling, and Verification for
Intersection Management of Intelligent Vehicles",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "95:1--95:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358221",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358221",
abstract = "Intersection management is one of the most
representative applications of intelligent vehicles
with connected and autonomous functions. The
connectivity provides environmental information that a
single vehicle cannot sense, and the autonomy supports
precise vehicular control that a human driver cannot
achieve. Intersection management solves the fundamental
conflict resolution problem for vehicles-two vehicles
should not appear at the same location at the same
time, and, if they intend to do that, an order should
be decided to optimize certain objectives such as the
traffic throughput or smoothness. In this paper, we
first propose a graph-based model for intersection
management. The model is general and applicable to
different granularities of intersections and other
conflicting scenarios. We then derive formal
verification approaches which can guarantee
deadlock-freeness. Based on the graph-based model and
the verification approaches, we develop a centralized
cycle removal algorithm for the graph-based model to
schedule vehicles to go through the intersection safely
(without collisions) and efficiently without deadlocks.
Experimental results demonstrate the expressiveness of
the proposed model and the effectiveness and efficiency
of the proposed algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kyriakis:2019:SMR,
author = "Panagiotis Kyriakis and Jyotirmoy V. Deshmukh and Paul
Bogdan",
title = "Specification Mining and Robust Design under
Uncertainty: a Stochastic Temporal Logic Approach",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "96:1--96:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358231",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358231",
abstract = "In this paper, we propose Stochastic Temporal Logic
(StTL) as a formalism for expressing probabilistic
specifications on time-varying behaviors of controlled
stochastic dynamical systems. To make StTL a more
effective specification formalism, we introduce the
quantitative semantics for StTL to reason about the
robust satisfaction of an StTL specification by a given
system. Additionally, we propose using the robustness
value as the objective function to be maximized by a
stochastic optimization algorithm for the purpose of
controller design. Finally, we formulate an algorithm
for parameter inference for Parameteric-StTL
specifications, which allows specifications to be mined
from output traces of the underlying system. We
demonstrate and validate our framework on two case
studies inspired by the automotive domain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghosh:2019:RRS,
author = "Bineet Ghosh and Parasara Sridhar Duggirala",
title = "Robust Reachable Set: Accounting for Uncertainties in
Linear Dynamical Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "97:1--97:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358229",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358229",
abstract = "Reachable set computation is one of the primary
techniques for safety verification of linear dynamical
systems. In reality the underlying dynamics have
uncertainties like parameter variations or modeling
uncertainties. Therefore, the reachable set computation
must consider the uncertainties in the dynamics to be
useful i.e. the computed reachable set should be over
or under approximation if not exact. This paper
presents a technique to compute reachable set of linear
dynamical systems with uncertainties. First, we
introduce a construct called support of a matrix. Using
this construct, we present a set of sufficient
conditions for which reachable set for uncertain linear
system can be computed efficiently; and safety
verification can be performed using bi-linear
programming. Finally, given a linear dynamical system,
we compute robust reachable set, which accounts for all
possible uncertainties that can be handled by the
sufficient conditions presented. Experimental
evaluation on benchmarks reveal that our algorithm is
computationally very efficient.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lal:2019:CGA,
author = "Ratan Lal and Pavithra Prabhakar",
title = "Counterexample Guided Abstraction Refinement for
Polyhedral Probabilistic Hybrid Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "98:1--98:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358217",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358217",
abstract = "We consider the problem of safety analysis of
probabilistic hybrid systems, which capture discrete,
continuous and probabilistic behaviors. We present a
novel counterexample guided abstraction refinement
(CEGAR) algorithm for a subclass of probabilistic
hybrid systems, called polyhedral probabilistic hybrid
systems (PHS), where the continuous dynamics is
specified using a polyhedral set within which the
derivatives of the continuous executions lie.
Developing a CEGAR algorithm for PHS is complex owing
to the branching behavior due to the probabilistic
transitions, and the infinite state space due to the
real-valued variables. We present a practical algorithm
by choosing a succinct representation for
counterexamples, an efficient validation algorithm and
a constructive method for refinement that ensures
progress towards the elimination of a spurious abstract
counterexample. The technical details for refinement
are non-trivial since there are no clear disjoint sets
for separation. We have implemented our algorithm in a
Python toolbox called Procegar; our experimental
analysis demonstrates the benefits of our method in
terms of successful verification results, as well as
bug finding.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Duggirala:2019:ASR,
author = "Parasara Sridhar Duggirala and Stanley Bak",
title = "Aggregation Strategies in Reachable Set Computation of
Hybrid Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "99:1--99:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358214",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358214",
abstract = "Computing the set of reachable states is a widely used
technique for proving that a hybrid system satisfies
its safety specification. Flow-pipe construction
methods interleave phases of computing continuous
successors and phases of computing discrete successors.
Directly doing this leads to a combinatorial explosion
problem, though, as with each discrete successor there
may be an interval of time where the transition can
occur, so that the number of paths becomes exponential
in the number of discrete transitions. For this reason,
most reachable set computation tools implement some
form of set aggregation for discrete transitions, such
as, performing a template-based overapproximation or
convex hull aggregation. These aggregation methods,
however, in theory can lead to unbounded error, and in
practice are often the root cause of why a safety
specification cannot be proven. This paper proposes
techniques for improving the accuracy of the
aggregation operations performed for reachable set
computation. First, we present two aggregation
strategies over generalized stars, namely convex hull
aggregation and template based aggregation. Second, we
perform adaptive deaggregation using a data structure
called Aggregated Directed Acyclic Graph (AGGDAG). Our
deaggregation strategy is driven by counterexamples and
hence has soundness and relative completeness
guarantees. We demonstrate the computational benefits
of our approach through two case studies involving
satellite rendezvous and gearbox meshing.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Salamati:2019:MEM,
author = "Mahmoud Salamati and Rocco Salvia and Eva Darulova and
Sadegh Soudjani and Rupak Majumdar",
title = "Memory-Efficient Mixed-Precision Implementations for
Robust Explicit Model Predictive Control",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "100:1--100:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358223",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358223",
abstract = "We propose an optimization for space-efficient
implementations of explicit model-predictive
controllers (MPC) for robust control of linear
time-invariant (LTI) systems on embedded platforms. We
obtain an explicit-form robust model-predictive
controller as a solution to a multi-parametric linear
programming problem. The structure of the controller is
a polyhedral decomposition of the control domain, with
an affine map for each domain. While explicit MPC is
suited for embedded devices with low computational
power, the memory requirements for such controllers can
be high. We provide an optimization algorithm for a
mixed-precision implementation of the controller, where
the deviation of the implemented controller from the
original one is within the robustness margin of the
robust control problem. The core of the mixed-precision
optimization is an iterative static analysis that
co-designs a robust controller and a low-bitwidth
approximation that is statically guaranteed to always
be within the robustness margin of the original
controller. We have implemented our algorithm and show
on a set of benchmarks that our optimization can reduce
space requirements by up to 20.9\% and on average by
12.6\% compared to a minimal uniform precision
implementation of the original controller.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Arrestier:2019:NRD,
author = "Florian Arrestier and Karol Desnos and Eduardo Juarez
and Daniel Menard",
title = "Numerical Representation of Directed Acyclic Graphs
for Efficient Dataflow Embedded Resource Allocation",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "101:1--101:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358225",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358225",
abstract = "Stream processing applications running on
Heterogeneous Multi-Processor Systems on Chips
(HMPSoCs) require efficient resource allocation and
management, both at compile-time and at runtime. To
cope with modern adaptive applications whose behavior
can not be exhaustively predicted at compile-time,
runtime managers must be able to take resource
allocation decisions on-the-fly, with a minimum
overhead on application performance. Resource
allocation algorithms often rely on an internal
modeling of an application. Directed Acyclic Graph
(DAGs) are the most commonly used models for capturing
control and data dependencies between tasks. DAGs are
notably often used as an intermediate representation
for deploying applications modeled with a dataflow
Model of Computation (MoC) on HMPSoCs. Building such
intermediate representation at runtime for massively
parallel applications is costly both in terms of
computation and memory overhead. In this paper, an
intermediate representation of DAGs for resource
allocation is presented. This new representation shows
improved performance for run-time analysis of dataflow
graphs with less overhead in both computation time and
memory footprint. The performances of the proposed
representation are evaluated on a set of computer
vision and machine learning applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ziegler:2019:HSE,
author = "Andreas Ziegler and Julian Geus and Bernhard Heinloth
and Timo H{\"o}nig and Daniel Lohmann",
title = "{Honey}, {I} Shrunk the {ELFs}: Lightweight Binary
Tailoring of Shared Libraries",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "102:1--102:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358222",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358222",
abstract = "In the embedded domain, industrial sectors (i.e.,
automotive industry, avionics) are undergoing radical
changes. They broadly adopt commodity hardware and move
away from special-purpose control units. During this
transition, heterogeneous software components are
consolidated to run on commodity operating systems. To
efficiently consolidate such components, a modular
encapsulation of common functionality into reusable
binary files (i.e., shared libraries) is essential.
However, shared libraries are often unnecessarily large
as they entail a lot of generic functionality that is
not required in a narrowly defined scenario. As the
source code of proprietary components is often
unavailable and the industry is heading towards
binary-only distribution, we propose an approach
towards lightweight binary tailoring. As demonstrated
in the evaluation, lightweight binary tailoring
effectively reduces the amount of code in all shared
libraries on a Linux-based system by 63 percent and
shrinks their files by 17 percent. The reduction in
size is beneficial to cut down costs (e.g., lower
storage and memory footprint) and eases code analyses
that are necessary for code audits.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2019:MTP,
author = "Runyu Pan and Gabriel Parmer",
title = "{MxU}: Towards Predictable, Flexible, and Efficient
Memory Access Control for the Secure {IoT}",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "103:1--103:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358224",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358224",
abstract = "The advanced functionality requirements of modern
embedded and Internet of Things (IoT) devices --- from
autonomous vehicles, to city and power-grid management
--- are driving an ever-increasing software complexity.
At the same time, the pervasive internet connections of
these systems necessitate the fundamental design of
security into these devices. The isolation of complex
features from those that are critical through
protection domains is an effective means to constrain
the scope of faults and security breaches. Common
hardware-provided memory facilities to enforce
protection domains through memory access control ---
including Memory Management Units (MMUs) usually found
in microprocessors, and Memory Protection Units (MPUs)
usually found in microcontrollers --- must meet the
goals of enabling flexible, efficient and dynamic
management of memory, and must enable tight bounds on
the worst-case execution of critical code.
Unfortunately, current system memory management
facilities are ill-prepared to handle this challenge:
MMUs that use extensive caches to achieve strong
average-case performance suffer from debilitating
worst-case and even average-case behavior under hefty
interference, while MPUs struggle to provide flexible
memory management. This paper details MxU, a memory
protection and allocation abstraction that integrates
temporal specifications into the memory management
subsystem, to enable portable code to achieve both
predictable, tightly-bounded execution and dynamic
management across both MMU- and MPU-based systems. We
implement MxU in the Composite microkernel, and
evaluate its flexibility and predictability over two
different architectures: a MPU-based Cortex-M7
microcontroller and a MMU-based Cortex-A9
microprocessor using a suite of modern applications
including neural network-based inference, SQLite, and a
javascript runtime. For MMU-based systems, MxU reduces
application TLB stall by up to 68.0\%. For MPU-based
systems, MxU enables flexible dynamic memory management
often with application overheads of 1\%, increasing to
6.1\% under significant interference.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yim:2019:TFS,
author = "Keun Soo Yim and Iliyan Malchev and Andrew Hsieh and
Dave Burke",
title = "{Treble}: Fast Software Updates by Creating an
Equilibrium in an Active Software Ecosystem of Globally
Distributed Stakeholders",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "104:1--104:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358237",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358237",
abstract = "This paper presents our experience with Treble, a
two-year initiative to build the modular base in
Android, a Java-based mobile platform running on the
Linux kernel. Our Treble architecture splits the
hardware independent core framework written in Java
from the hardware dependent vendor implementations
(e.g., user space device drivers, vendor native
libraries, and kernel written in C/C++). Cross-layer
communications between them are done via versioned,
stable inter-process communication interfaces whose
backward compatibility is tested by using two API
compliance suites. Based on this architecture, we
repackage the key Android software components that
suffered from crucial post-launch security bugs as
separate images. That not only enables separate
ownerships but also independent updates of each image
by interested ecosystem entities. We discuss our
experience of delivering Treble architectural changes
to silicon vendors and device makers using a yearly
release model. Our experiments and industry rollouts
support our hypothesis that giving more freedom to all
ecosystem entities and creating an equilibrium are a
transformation necessary to further scale the world
largest open source ecosystem with over two billion
active devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tran:2019:SVC,
author = "Hoang-Dung Tran and Feiyang Cai and Manzanas Lopez
Diego and Patrick Musau and Taylor T. Johnson and
Xenofon Koutsoukos",
title = "Safety Verification of Cyber-Physical Systems with
Reinforcement Learning Control",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "105:1--105:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358230",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358230",
abstract = "This paper proposes a new forward reachability
analysis approach to verify safety of cyber-physical
systems (CPS) with reinforcement learning controllers.
The foundation of our approach lies on two efficient,
exact and over-approximate reachability algorithms for
neural network control systems using star sets, which
is an efficient representation of polyhedra. Using
these algorithms, we determine the initial conditions
for which a safety-critical system with a neural
network controller is safe by incrementally searching a
critical initial condition where the safety of the
system cannot be established. Our approach produces
tight over-approximation error and it is
computationally efficient, which allows the application
to practical CPS with learning enable components
(LECs). We implement our approach in NNV, a recent
verification tool for neural networks and neural
network control systems, and evaluate its advantages
and applicability by verifying safety of a practical
Advanced Emergency Braking System (AEBS) with a
reinforcement learning (RL) controller trained using
the deep deterministic policy gradient (DDPG) method.
The experimental results show that our new reachability
algorithms are much less conservative than existing
polyhedra-based approaches. We successfully determine
the entire region of the initial conditions of the AEBS
with the RL controller such that the safety of the
system is guaranteed, while a polyhedra-based approach
cannot prove the safety properties of the system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2019:RRA,
author = "Chao Huang and Jiameng Fan and Wenchao Li and Xin Chen
and Qi Zhu",
title = "{ReachNN}: Reachability Analysis of Neural-Network
Controlled Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "106:1--106:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358228",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358228",
abstract = "Applying neural networks as controllers in dynamical
systems has shown great promises. However, it is
critical yet challenging to verify the safety of such
control systems with neural-network controllers in the
loop. Previous methods for verifying neural network
controlled systems are limited to a few specific
activation functions. In this work, we propose a new
reachability analysis approach based on Bernstein
polynomials that can verify neural-network controlled
systems with a more general form of activation
functions, i.e., as long as they ensure that the neural
networks are Lipschitz continuous. Specifically, we
consider abstracting feedforward neural networks with
Bernstein polynomials for a small subset of inputs. To
quantify the error introduced by abstraction, we
provide both theoretical error bound estimation based
on the theory of Bernstein polynomials and more
practical sampling based error bound estimation,
following a tight Lipschitz constant estimation
approach based on forward reachability analysis.
Compared with previous methods, our approach addresses
a much broader set of neural networks, including
heterogeneous neural networks that contain multiple
types of activation functions. Experiment results on a
variety of benchmarks show the effectiveness of our
approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yaghoubi:2019:WCS,
author = "Shakiba Yaghoubi and Georgios Fainekos",
title = "Worst-case Satisfaction of {STL} Specifications Using
Feedforward Neural Network Controllers: a {Lagrange}
Multipliers Approach",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "107:1--107:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358239",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358239",
abstract = "In this paper, a reinforcement learning approach for
designing feedback neural network controllers for
nonlinear systems is proposed. Given a Signal Temporal
Logic (STL) specification which needs to be satisfied
by the system over a set of initial conditions, the
neural network parameters are tuned in order to
maximize the satisfaction of the STL formula. The
framework is based on a max-min formulation of the
robustness of the STL formula. The maximization is
solved through a Lagrange multipliers method, while the
minimization corresponds to a falsification problem. We
present our results on a vehicle and a quadrotor model
and demonstrate that our approach reduces the training
time more than 50 percent compared to the baseline
approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "107",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Oehlert:2019:CIT,
author = "Dominic Oehlert and Selma Saidi and Heiko Falk",
title = "Code-Inherent Traffic Shaping for Hard Real-Time
Systems",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "108:1--108:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358215",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358215",
abstract = "Modern hard real-time systems evolved from isolated
single-core architectures to complex multi-core
architectures which are often connected in a
distributed manner. With the increasing influence of
interconnections in hard real-time systems, the access
behavior to shared resources of single tasks or cores
becomes a crucial factor for the system's overall
worst-case timing properties. Traffic shaping is a
powerful technique to decrease contention in a network
and deliver guarantees on network streams. In this
paper we present a novel approach to automatically
integrate a traffic shaping behavior into the code of a
program for different traffic shaping profiles while
being as least invasive as possible. As this approach
is solely depending on modifying programs on a
code-level, it does not rely on any additional hardware
or operating system-based functions. We show how
different traffic shaping profiles can be implemented
into programs using a greedy heuristic and an
evolutionary algorithm, as well as their influences on
the modified programs. It is demonstrated that the
presented approaches can be used to decrease worst-case
execution times in multi-core systems and lower buffer
requirements in distributed systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "108",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Awan:2019:TAM,
author = "Muhammad Ali Awan and Konstantinos Bletsas and Pedro
F. Souto and Benny Akesson and Eduardo Tovar",
title = "Techniques and Analysis for Mixed-criticality
Scheduling with Mode-dependent Server Execution
Budgets",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "109:1--109:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358234",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358234",
abstract = "In mixed-criticality systems, tasks of different
criticality share system resources, mainly to reduce
cost. Cost is further reduced by using adaptive
mode-based scheduling arrangements, such as Vestal's
model, to improve resource efficiency, while
guaranteeing schedulability of critical functionality.
To simplify safety certification, servers are often
used to provide temporal isolation between tasks. In
its simplest form, a server is a periodically recurring
time window, in which some tasks are scheduled. A
server's computational requirements may greatly vary in
different modes, although state-of-the-art techniques
and schedulability tests do not allow different budgets
to be used by a server in different modes. This results
in a single conservative execution budget for all
modes, increasing system cost. The goal of this paper
is to reduce the cost of mixed-criticality systems
through three main contributions: (i) a scheduling
arrangement for uniprocessor systems employing
fixed-priority scheduling within periodic servers,
whose budgets are dynamically adjusted at run-time in
the event of a mode change, (ii) a new schedulability
analysis for such systems, and (iii) heuristic
algorithms for assigning budgets to servers in
different modes and ordering the execution of the
servers. Experiments with synthetic task sets
demonstrate considerable improvements (up to 52.8\%) in
scheduling success ratio when using dynamic server
budgets vs. static ``one-size-fits-all-modes''
budgets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "109",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{VanPinxten:2019:PSC,
author = "Joost {Van Pinxten} and Marc Geilen and Twan Basten",
title = "Parametric Scheduler Characterization",
journal = j-TECS,
volume = "18",
number = "5s",
pages = "110:1--110:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358226",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:44 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358226",
abstract = "Schedulers assign starting times to events in a system
such that a set of constraints is met and system
productivity is maximized. We characterize the
scheduler behaviour for the case where decisions are
made by comparing affine expressions of design
parameters such as task workload, processing speed,
robot travelling speed, or a controller's rise and
settling time. Deterministic schedulers can be extended
with symbolic execution, to keep track of the affine
conditions on the parameters for which the scheduling
decisions are made. We introduce a divide-and-conquer
algorithm that uses this information to determine
parameter regions for which the same sequence of
decisions is taken given a particular scenario. The
results provide designers insight in the impact of
parameter changes on the performance of their system.
The exploration can also be executed with the KLEE
symbolic execution engine of the LLVM tool chain to
extract the same results. We show that the
divide-and-conquer approach provides the results much
faster than the generic symbolic execution engine of
KLEE. The results allow visualization of the
sensitivity to all parameter combinations. The results
of our approach therefore provide more insight in the
sensitivity to parameters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "110",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2020:EEC,
author = "Sandeep K. Shukla",
title = "Editorial: Embedded Computing and Society",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--3",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3368250",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3368250",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "112",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jain:2020:CHS,
author = "Shubham Jain and Anand Raghunathan",
title = "{CxDNN}: Hardware-software Compensation Methods for
Deep Neural Networks on Resistive Crossbar Systems",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--23",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362035",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362035",
abstract = "Resistive crossbars have shown strong potential as the
building blocks of future neural fabrics, due to their
ability to natively execute vector-matrix
multiplication (the dominant computational kernel in
DNNs). However, a key challenge that arises in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "113",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tiku:2020:OSV,
author = "Saideep Tiku and Sudeep Pasricha",
title = "Overcoming Security Vulnerabilities in Deep
Learning-based Indoor Localization Frameworks on Mobile
Devices",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--24",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362036",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362036",
abstract = "Indoor localization is an emerging application domain
for the navigation and tracking of people and assets.
Ubiquitously available Wi-Fi signals have enabled
low-cost fingerprinting-based localization solutions.
Further, the rapid growth in mobile \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "114",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tiwari:2020:RRA,
author = "Sakshi Tiwari and Shreshth Tuli and Isaar Ahmad and
Ayushi Agarwal and Preeti Ranjan Panda and Sreenivas
Subramoney",
title = "{REAL}: {REquest} Arbitration in Last Level Caches",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--24",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362100",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362100",
abstract = "Shared last level caches (LLC) of multicore
systems-on-chip are subject to a significant amount of
contention over a limited bandwidth, resulting in major
performance bottlenecks that make the issue a
first-order concern in modern multiprocessor
systems-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "115",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sood:2020:RDV,
author = "Surinder Sood and Avinash Malik and Partha Roop",
title = "Robust Design and Validation of Cyber-physical
Systems",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--21",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362098",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362098",
abstract = "Co-simulation--based validation of hardware
controllers adjoined with plant models, with continuous
dynamics, is an important step in model-based design of
controllers for Cyber-physical Systems (CPS).
Co-simulation suffers from many problems, such as
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "116",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2020:BBT,
author = "Jia Zhou and Prachi Joshi and Haibo Zeng and Renfa
Li",
title = "{BTMonitor}: Bit-time-based Intrusion Detection and
Attacker Identification in Controller Area Network",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--23",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362034",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362034",
abstract = "With the rapid growth of connectivity and autonomy for
today's automobiles, their security vulnerabilities are
becoming one of the most urgent concerns in the
automotive industry. The lack of message authentication
in Controller Area Network (CAN), \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "117",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2020:HSC,
author = "Mengquan Li and Weichen Liu and Nan Guan and Yiyuan
Xie and Yaoyao Ye",
title = "Hardware-Software Collaborative Thermal Sensing in
Optical Network-on-Chip--based Manycore Systems",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--24",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362099",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362099",
abstract = "Continuous technology scaling in manycore systems
leads to severe overheating issues. To guarantee system
reliability, it is critical to accurately yet
efficiently monitor runtime temperature distribution
for effective chip thermal management. As an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "118",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Neshatpour:2020:IIC,
author = "Katayoun Neshatpour and Houman Homayoun and Avesta
Sasan",
title = "{ICNN}: The Iterative Convolutional Neural Network",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--27",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3355553",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3355553",
abstract = "Modern and recent architectures of vision-based
Convolutional Neural Networks (CNN) have improved
detection and prediction accuracy significantly.
However, these algorithms are extremely computationally
intensive. To break the power and performance wall
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "119",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cedersjo:2020:TFC,
author = "Gustav Cedersj{\"o} and J{\"o}rn W. Janneck",
title = "{T{\"y}cho}: a Framework for Compiling Stream
Programs",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--25",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362692",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362692",
abstract = "Many application areas for embedded systems, such as
DSP, media coding, and image processing, are based on
stream processing. Stream programs in these areas are
often naturally described as graphs, where nodes are
computational kernels that send data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "120",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hammadeh:2020:WHR,
author = "Zain A. H. Hammadeh and Sophie Quinton and Rolf
Ernst",
title = "Weakly-hard Real-time Guarantees for Earliest Deadline
First Scheduling of Independent Tasks",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--25",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3356865",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3356865",
abstract = "The current trend in modeling and analyzing real-time
systems is toward tighter yet safe timing constraints.
Many practical real-time systems can de facto sustain a
bounded number of deadline-misses, i.e., they have
Weakly-Hard Real-Time (WHRT) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "121",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Krishnakumar:2020:APL,
author = "Gnanambikai Krishnakumar and Kommuru Alekhya Reddy and
Chester Rebeiro",
title = "{ALEXIA}: a Processor with Lightweight Extensions for
Memory Safety",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--27",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362064",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362064",
abstract = "Illegal use of memory pointers is a serious security
vulnerability. A large number of malwares exploit the
spatial and temporal nature of these vulnerabilities to
subvert execution or glean sensitive data from an
application. Recent countermeasures \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "122",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yan:2020:TCH,
author = "Kaige Yan and Jingweijia Tan and Longjun Liu and
Xingyao Zhang and Stanko R. Brankovic and Jinghong Chen
and Xin Fu",
title = "Toward Customized Hybrid Fuel-Cell and Battery-powered
Mobile Device for Individual Users",
journal = j-TECS,
volume = "18",
number = "6",
pages = "1--20",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362033",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Jan 23 06:51:29 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362033",
abstract = "Rapidly evolving technologies and applications of
mobile devices inevitably increase the power demands on
the battery. However, the development of batteries can
hardly keep pace with the fast-growing demands, leading
to short battery life, which becomes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "123",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Han:2020:BAP,
author = "Jian-Jun Han and Sunlu Gong and Zhenjiang Wang and Wen
Cai and Dakai Zhu and Laurence T. Yang",
title = "Blocking-Aware Partitioned Real-Time Scheduling for
Uniform Heterogeneous Multicore Platforms",
journal = j-TECS,
volume = "19",
number = "1",
pages = "1:1--1:25",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366683",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366683",
abstract = "Heterogeneous multicore processors have recently
become de facto computing engines for state-of-the-art
embedded applications. Nonetheless, very little
research focuses on the scheduling of periodic
(implicit-deadline) real-time tasks upon heterogeneous
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Marco:2020:ODL,
author = "Vicent Sanz Marco and Ben Taylor and Zheng Wang and
Yehia Elkhatib",
title = "Optimizing Deep Learning Inference on Embedded Systems
Through Adaptive Model Selection",
journal = j-TECS,
volume = "19",
number = "1",
pages = "2:1--2:28",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3371154",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3371154",
abstract = "Deep neural networks (DNNs) are becoming a key
enabling technique for many application domains.
However, on-device inference on battery-powered,
resource-constrained embedding systems is often
infeasible due to prohibitively long inferencing time
and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Azari:2020:ETO,
author = "Elham Azari and Sarma Vrudhula",
title = "{ELSA}: a Throughput-Optimized Design of an {LSTM}
Accelerator for Energy-Constrained Devices",
journal = j-TECS,
volume = "19",
number = "1",
pages = "3:1--3:21",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366634",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366634",
abstract = "The next significant step in the evolution and
proliferation of artificial intelligence technology
will be the integration of neural network (NN) models
within embedded and mobile systems. This calls for the
design of compact, energy efficient NN models
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jayakodi:2020:DOE,
author = "Nitthilan Kanappan Jayakodi and Syrine Belakaria and
Aryan Deshwal and Janardhan Rao Doppa",
title = "Design and Optimization of Energy-Accuracy Tradeoff
Networks for Mobile Platforms via Pretrained Deep
Models",
journal = j-TECS,
volume = "19",
number = "1",
pages = "4:1--4:24",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366636",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366636",
abstract = "Many real-world edge applications including object
detection, robotics, and smart health are enabled by
deploying deep neural networks (DNNs) on
energy-constrained mobile platforms. In this article,
we propose a novel approach to trade off energy and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Romaszkan:2020:PPP,
author = "Wojciech Romaszkan and Tianmu Li and Puneet Gupta",
title = "{3PXNet}: Pruned-Permuted-Packed {XNOR} Networks for
Edge Machine Learning",
journal = j-TECS,
volume = "19",
number = "1",
pages = "5:1--5:23",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3371157",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3371157",
abstract = "As the adoption of Neural Networks continues to
proliferate different classes of applications and
systems, edge devices have been left behind. Their
strict energy and storage limitations make them unable
to cope with the sizes of common network models.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lang:2020:DIE,
author = "Clemens Lang and Isabella Stilkerich",
title = "Design and Implementation of an Escape Analysis in the
Context of Safety-Critical Embedded Systems",
journal = j-TECS,
volume = "19",
number = "1",
pages = "6:1--6:20",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372133",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372133",
abstract = "The use of a managed, type-safe language such as
Standard ML, Ada Ravenscar, or Java in hard real-time
and embedded systems offers productivity, safety, and
dependability benefits at a reasonable cost. Static
software systems, that is systems in which \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{He:2020:BCL,
author = "Wenjian He and Sanjeev Das and Wei Zhang and Yang
Liu",
title = "{BBB-CFI}: Lightweight {CFI} Approach Against
Code-Reuse Attacks Using Basic Block Information",
journal = j-TECS,
volume = "19",
number = "1",
pages = "7:1--7:22",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3371151",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3371151",
abstract = "Code-reuse attack is a concrete threat to computing
systems because it can evade conventional security
defenses. Control flow integrity (CFI) is proposed to
repel this threat. However, former implementations of
CFI suffer from two major drawbacks: \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lizarraga:2020:AMB,
author = "Adrian Lizarraga and Jonathan Sprinkle and Roman
Lysecky",
title = "Automated Model-Based Optimization of Data-Adaptable
Embedded Systems",
journal = j-TECS,
volume = "19",
number = "1",
pages = "8:1--8:22",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372142",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 15 07:25:13 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372142",
abstract = "Dynamic data-driven applications such as object
tracking, surveillance, and other sensing and decision
applications are largely dependent on the
characteristics of the data streams on which they
operate. The underlying models and algorithms of data-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghosh:2020:PGI,
author = "Sumana Ghosh and Soumyajit Dey and Pallab Dasgupta",
title = "Pattern Guided Integrated Scheduling and Routing in
Multi-Hop Control Networks",
journal = j-TECS,
volume = "19",
number = "2",
pages = "9:1--9:28",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372134",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372134",
abstract = "Executing a set of control loops over a shared
multi-hop (wireless) control network (MCN) requires
careful co-scheduling of the control tasks and the
routing of sensory/actuation messages over the MCN. In
this work, we establish pattern guided aperiodic
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2020:QEO,
author = "Fupeng Chen and Heng Yu and Yajun Ha",
title = "Quality Estimation and Optimization of Adaptive Stereo
Matching Algorithms for Smart Vehicles",
journal = j-TECS,
volume = "19",
number = "2",
pages = "10:1--10:24",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372784",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372784",
abstract = "Stereo matching is a promising approach for smart
vehicles to find the depth of nearby objects.
Transforming a traditional stereo matching algorithm to
its adaptive version has potential advantages to
achieve the maximum quality (depth accuracy) in a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nejatollahi:2020:SFA,
author = "Hamid Nejatollahi and Felipe Valencia and Subhadeep
Banik and Francesco Regazzoni and Rosario Cammarota and
Nikil Dutt",
title = "Synthesis of Flexible Accelerators for Early Adoption
of Ring-{LWE} Post-quantum Cryptography",
journal = j-TECS,
volume = "19",
number = "2",
pages = "11:1--11:17",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3378164",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378164",
abstract = "The advent of the quantum computer makes current
public-key infrastructure insecure. Cryptography
community is addressing this problem by designing,
efficiently implementing, and evaluating novel
public-key algorithms capable of withstanding quantum
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Navarro:2020:MLM,
author = "Osvaldo Navarro and Jones Yudi and Javier Hoffmann and
Hector Gerardo Mu{\~n}oz Hernandez and Michael
H{\"u}bner",
title = "A Machine Learning Methodology for Cache Memory Design
Based on Dynamic Instructions",
journal = j-TECS,
volume = "19",
number = "2",
pages = "12:1--12:20",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3376920",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3376920",
abstract = "Cache memories are an essential component of modern
processors and consume a large percentage of their
power consumption. Its efficacy depends heavily on the
memory demands of the software. Thus, finding the
optimal cache for a particular program is not
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kindt:2020:EMB,
author = "Philipp H. Kindt and Daniel Yunge and Robert Diemer
and Samarjit Chakraborty",
title = "Energy Modeling for the {Bluetooth} Low Energy
Protocol",
journal = j-TECS,
volume = "19",
number = "2",
pages = "13:1--13:32",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3379339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3379339",
abstract = "Bluetooth Low Energy (BLE) is a wireless protocol
optimized for low-power communication. To design
energy-efficient devices, the protocol provides a
number of parameters that need to be optimized within
an energy, latency, and throughput design space.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Luppold:2020:CWC,
author = "Arno Luppold and Dominic Oehlert and Heiko Falk",
title = "Compiling for the Worst Case: Memory Allocation for
Multi-task and Multi-core Hard Real-time Systems",
journal = j-TECS,
volume = "19",
number = "2",
pages = "14:1--14:26",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3381752",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3381752",
abstract = "Modern embedded hard real-time systems feature
multiple tasks running on multiple processing cores.
Schedulability analysis of such systems is usually
performed on an abstract system level with each task
being represented as a black box with fixed \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmad:2020:FFB,
author = "Afzal Ahmad and Muhammad Adeel Pasha",
title = "{FFConv}: an {FPGA}-based Accelerator for Fast
Convolution Layers in Convolutional Neural Networks",
journal = j-TECS,
volume = "19",
number = "2",
pages = "15:1--15:24",
month = mar,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3380548",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Mar 18 07:47:52 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380548",
abstract = "Image classification is known to be one of the most
challenging problems in the domain of computer vision.
Significant research is being done on developing
systems and algorithms improving accuracy, performance,
area, and power consumption for related \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shukla:2020:TER,
author = "Sandeep K. Shukla",
title = "{TECS} Editorial: Rethinking and Re-evaluating in the
Time of Crisis",
journal = j-TECS,
volume = "19",
number = "3",
pages = "16e:1--16e:3",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3395923",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3395923",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ganapathy:2020:DDV,
author = "Sanjay Ganapathy and Swagath Venkataramani and
Giridhur Sriraman and Balaraman Ravindran and Anand
Raghunathan",
title = "{DyVEDeep}: Dynamic Variable Effort Deep Neural
Networks",
journal = j-TECS,
volume = "19",
number = "3",
pages = "16:1--16:24",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372882",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372882",
abstract = "Deep Neural Networks (DNNs) have advanced the
state-of-the-art in a variety of machine learning tasks
and are deployed in increasing numbers of products and
services. However, the computational requirements of
training and evaluating large-scale DNNs \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Smeets:2020:ARS,
author = "Hugues Smeets and Matteo Ceriotti and Pedro Jos{\'e}
Marr{\'o}n",
title = "Adapting Recursive Sinusoidal Software Oscillators for
Low-power Fixed-point Processors",
journal = j-TECS,
volume = "19",
number = "3",
pages = "17:1--17:26",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3378559",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378559",
abstract = "The growing field of the Internet of Things relies at
the bottom on components with very scarce computing
resources that currently do not allow complex
processing of sensed data. Any computation involving
Fast Fourier Transforms (FFT), Wavelet \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cheng:2020:DDT,
author = "Yuan Cheng and Guangya Li and Ngai Wong and Hai-Bao
Chen and Hao Yu",
title = "{DEEPEYE}: a Deeply Tensor-Compressed Neural Network
for Video Comprehension on Terminal Devices",
journal = j-TECS,
volume = "19",
number = "3",
pages = "18:1--18:25",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3381805",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3381805",
abstract = "Video object detection and action recognition
typically require deep neural networks (DNNs) with huge
number of parameters. It is thereby challenging to
develop a DNN video comprehension unit in
resource-constrained terminal devices. In this article,
we \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aerabi:2020:DSE,
author = "Ehsan Aerabi and Milad Bohlouli and Mohammad Hasan
Ahmadi Livany and Mahdi Fazeli and Athanasios
Papadimitriou and David Hely",
title = "Design Space Exploration for Ultra-Low-Energy and
Secure {IoT MCUs}",
journal = j-TECS,
volume = "19",
number = "3",
pages = "19:1--19:34",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3384446",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3384446",
abstract = "This article explores the design space of secure
communication in ultra-low-energy IoT devices based on
Micro-Controller Units (MCUs). It tries to identify,
benchmark, and compare security-related design choices
in a Commercial-Off-The-Shelf (COTS) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2020:MMP,
author = "Hwajeong Seo and Kyuhwang An and Hyeokdong Kwon and
Zhi Hu",
title = "{Montgomery} Multiplication for Public Key
Cryptography on {MSP430X}",
journal = j-TECS,
volume = "19",
number = "3",
pages = "20:1--20:15",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387919",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387919",
abstract = "For traditional public key cryptography and
post-quantum cryptography, such as elliptic curve
cryptography and supersingular isogeny key
encapsulation, modular multiplication is the most
performance-critical operation among basic arithmetic
of these cryptographic schemes. For this reason, the
execution timing of such cryptographic schemes, which
may highly determine the service availability for
low-end microprocessors (e.g., 8-bit AVR, 16-bit
MSP430X, and 32-bit ARM Cortex-M), mainly relies on the
efficiency of modular multiplication on target embedded
processors.
In this article, we present new optimal modular
multiplication techniques based on the interleaved
Montgomery multiplication on 16-bit MSP430X
microprocessors, where the multiplication part is
performed in a hardware multiplier and the reduction
part is performed in a basic arithmetic logic unit
(ALU) with the optimal modular multiplication routine,
respectively. This two-step approach is effective for
the special modulus of NIST curves, SM2 curves, and
supersingular isogeny key encapsulation. We further
optimized the Montgomery reduction by using techniques
for Montgomery-friendly prime. This technique
significantly reduces the number of partial products.
To demonstrate the superiority of the proposed
implementation of Montgomery multiplication, we applied
the proposed method to the NIST P-256 curve, of which
the implementation improves the previous modular
multiplication operation by 23.6\% on 16-bit MSP430X
microprocessors and to the SM2 curve as well (first
implementation on 16-bit MSP430X
microcontrollers).
Moreover, secure countermeasures against timing attack
and simple power analysis are also applied to the
scalar multiplication of NIST P-256 and SM2 curves,
which achieve the 8,582,338 clock cycles (0.53 seconds
at 16 MHz) and 10,027,086 clock cycles (0.62 seconds at
16 MHz), respectively. The proposed Montgomery
multiplication is a generic method that can be applied
to other cryptographic schemes and microprocessors with
minor modifications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghosh:2020:RSD,
author = "Saurav Kumar Ghosh and Jaffer Sheriff R. C. and Vibhor
Jain and Soumyajit Dey",
title = "Reliable and Secure Design-Space-Exploration for
Cyber-Physical Systems",
journal = j-TECS,
volume = "19",
number = "3",
pages = "21:1--21:29",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387927",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 8 17:07:32 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387927",
abstract = "Given the widespread deployment of cyber-physical
systems and their safety-critical nature, reliability
and security guarantees offered by such systems are of
paramount importance. While the security of such
systems against sensor attacks have garnered \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2020:NLD,
author = "Zhuoran Zhao and Kamyar Mirzazad Barijough and Andreas
Gerstlauer",
title = "Network-level Design Space Exploration of
Resource-constrained Networks-of-Systems",
journal = j-TECS,
volume = "19",
number = "4",
pages = "22:1--22:26",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387918",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387918",
abstract = "Driven by recent advances in networking and computing
technologies, distributed application scenarios are
increasingly deployed on resource-constrained
processing platforms. This includes networked embedded
and cyber-physical systems as well as edge \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kadiyala:2020:LLA,
author = "Sai Praveen Kadiyala and Manaar Alam and Yash
Shrivastava and Sikhar Patranabis and Muhamed Fauzi Bin
Abbas and Arnab Kumar Biswas and Debdeep Mukhopadhyay
and Thambipillai Srikanthan",
title = "{LAMBDA: Lightweight Assessment of Malware for
emBeddeD} Architectures",
journal = j-TECS,
volume = "19",
number = "4",
pages = "23:1--23:31",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3390855",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3390855",
abstract = "Security is a critical aspect in many of the latest
embedded and IoT systems. Malware is one of the severe
threats of security for such devices. There have been
enormous efforts in malware detection and analysis;
however, occurrences of newer varieties \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Das:2020:ALS,
author = "Tuhin Subhra Das and Prasun Ghosal and Navonil
Chatterjee and Arnab Nath and Akash Banerjee and
Subhojyoti Khastagir",
title = "Application of Logical Sub-networking in
Congestion-aware Deadlock-free {SDmesh} Routing",
journal = j-TECS,
volume = "19",
number = "4",
pages = "24:1--24:26",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3387928",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3387928",
abstract = "An adaptive routing helps in evading early network
saturation by steering data packets through the less
congested area at the oppressive loaded situation.
However, performances of adaptive routing are not
always promising under all circumstances. Say
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chuang:2020:DDB,
author = "Yi-Jing Chuang and Shuo-Han Chen and Yuan-Hao Chang
and Yu-Pei Liang and Hsin-Wen Wei and Wei-Kuan Shih",
title = "{DSTL}: a Demand-Based Shingled Translation Layer for
Enabling Adaptive Address Mapping on {SMR} Drives",
journal = j-TECS,
volume = "19",
number = "4",
pages = "25:1--25:21",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391892",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3391892",
abstract = "Shingled magnetic recording (SMR) is regarded as a
promising technology for resolving the areal density
limitation of conventional magnetic recording hard disk
drives. Among different types of SMR drives,
drive-managed SMR (DM-SMR) requires no changes
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Behrouzian:2020:FAR,
author = "Amir Behrouzian and Hadi Alizadeh Ara and Marc Geilen
and Dip Goswami and Twan Basten",
title = "Firmness Analysis of Real-time Tasks",
journal = j-TECS,
volume = "19",
number = "4",
pages = "26:1--26:24",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3398328",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3398328",
abstract = "( m, k )-firm real-time tasks require meeting the
deadline of at least m jobs out of any k consecutive
jobs. When compared to hard real-time tasks, $ (m, k) $
firm tasks open up the possibility of tighter
resource-dimensioning in implementations. Firmness
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liu:2020:AML,
author = "Ke Liu and Mengying Zhao and Lei Ju and Zhiping Jia
and Jingtong Hu and Chun Jason Xue",
title = "Applying Multiple Level Cell to Non-volatile {FPGAs}",
journal = j-TECS,
volume = "19",
number = "4",
pages = "27:1--27:22",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3400885",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3400885",
abstract = "Static random access memory- (SRAM) based field
programmable gate arrays (FPGAs) are currently facing
challenges of limited capacity and high leakage power.
To solve this problem, non-volatile memory (NVM) is
proposed as the alternative to build non-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sheikh:2020:EER,
author = "Saad Zia Sheikh and Muhammad Adeel Pasha",
title = "Energy-efficient Real-time Scheduling on Multicores: a
Novel Approach to Model Cache Contention",
journal = j-TECS,
volume = "19",
number = "4",
pages = "28:1--28:25",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399413",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3399413",
abstract = "With the increasing demand for higher performance, the
adoption of multicores has been a major stepping stone
in the evolution of hard real-time systems. Though the
computational bandwidth is increased due to parallel
processing, the indispensable \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2020:GBT,
author = "Junyan Hu and Kenli Li and Chubo Liu and Keqin Li",
title = "Game-Based Task Offloading of Multiple Mobile Devices
with {QoS} in Mobile Edge Computing Systems of Limited
Computation Capacity",
journal = j-TECS,
volume = "19",
number = "4",
pages = "29:1--29:21",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3398038",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jul 19 08:50:15 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3398038",
abstract = "Mobile edge computing (MEC) is becoming a promising
paradigm of providing computing servers, like cloud
computing, to Edge node. Compared to cloud servers,
MECs are deployed closer to mobile devices (MDs) and
can provide high quality-of-service (QoS \ldots{}).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saha:2021:MWR,
author = "Debasri Saha and Susmita Sur-Kolay",
title = "Minimization of {WCRT} with Recovery Assurance from
Hardware {Trojans} for Tasks on {FPGA}-based Cloud",
journal = j-TECS,
volume = "20",
number = "1",
pages = "1:1--1:25",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409479",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3409479",
abstract = "Dynamic partial reconfiguration (DPR) enabled
FPGA-based Cloud architecture acts as a flexible and
efficient shared environment to facilitates application
support to users' request at low cost. While on one
hand we need to handle a variety of tasks, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Srinivasavarma:2021:TBC,
author = "Vegesna S. M. Srinivasavarma and Shiv Vidhyut and Noor
Mahammad S.",
title = "A {TCAM}-based Caching Architecture Framework for
Packet Classification",
journal = j-TECS,
volume = "20",
number = "1",
pages = "2:1--2:19",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409109",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3409109",
abstract = "Packet Classification is the enabling function for
performing many networking applications like Integrated
Services, Differentiated Services, Access
Control/Firewalls, and Intrusion Detection. To cope
with high-speed links and ever-increasing bandwidth
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pazzaglia:2021:GWH,
author = "Paolo Pazzaglia and Youcheng Sun and Marco {Di
Natale}",
title = "Generalized Weakly Hard Schedulability Analysis for
Real-Time Periodic Tasks",
journal = j-TECS,
volume = "20",
number = "1",
pages = "3:1--3:26",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404888",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3404888",
abstract = "The weakly hard real-time model is an abstraction for
applications, including control systems, that can
tolerate occasional deadline misses, but can also be
compromised if a sufficiently high number of late
terminations occur in a given time window. The
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paul:2021:ATA,
author = "Suraj Paul and Navonil Chatterjee and Prasun Ghosal
and Jean-Philippe Diguet",
title = "Adaptive Task Allocation and Scheduling on {NoC}-based
Multicore Platforms with Multitasking Processors",
journal = j-TECS,
volume = "20",
number = "1",
pages = "4:1--4:26",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3408324",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3408324",
abstract = "The application workloads in modern multicore
platforms are becoming increasingly dynamic. It becomes
challenging when multiple applications need to be
executed in parallel in such systems. Mapping and
scheduling of these applications are critical for
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Agarwal:2021:IPH,
author = "Sukarn Agarwal and Hemangee K. Kapoor",
title = "Improving the Performance of Hybrid Caches Using
Partitioned Victim Caching",
journal = j-TECS,
volume = "20",
number = "1",
pages = "5:1--5:27",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3411368",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3411368",
abstract = "Non-Volatile Memory technologies are coming as a
viable option on account of the high density and
low-leakage power over the conventional SRAM
counterpart. However, the increased write latency
reduces their chances as a substitute for SRAM. To
attenuate \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{He:2021:GCF,
author = "Jiaji He and Haocheng Ma and Yanjiang Liu and Yiqiang
Zhao",
title = "Golden Chip-Free {Trojan} Detection Leveraging {Trojan
Trigger}'s Side-Channel Fingerprinting",
journal = j-TECS,
volume = "20",
number = "1",
pages = "6:1--6:18",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3419105",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3419105",
abstract = "Hardware Trojans (HTs) have become a major threat for
the integrated circuit industry and supply chain and
have motivated numerous developments of HT detection
schemes. Although the side-channel HT detection
approach is among the most promising \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ivanov:2021:VSA,
author = "Radoslav Ivanov and Taylor J. Carpenter and James
Weimer and Rajeev Alur and George J. Pappas and Insup
Lee",
title = "Verifying the Safety of Autonomous Systems with Neural
Network Controllers",
journal = j-TECS,
volume = "20",
number = "1",
pages = "7:1--7:26",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3419742",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3419742",
abstract = "This article addresses the problem of verifying the
safety of autonomous systems with neural network (NN)
controllers. We focus on NNs with sigmoid/tanh
activations and use the fact that the sigmoid/tanh is
the solution to a quadratic differential \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ibrahim:2021:MFU,
author = "Omar Adel Ibrahim and Savio Sciancalepore and Gabriele
Oligeri and Roberto {Di Pietro}",
title = "{MAGNETO}: Fingerprinting {USB} Flash Drives via
Unintentional Magnetic Emissions",
journal = j-TECS,
volume = "20",
number = "1",
pages = "8:1--8:26",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3422308",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Jan 16 06:52:20 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3422308",
abstract = "Universal Serial Bus (USB) Flash Drives are nowadays
one of the most convenient and diffused means to
transfer files, especially when no Internet connection
is available. However, USB flash drives are also one of
the most common attack vectors used to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fard:2021:APP,
author = "Mahdi Mohammadpour Fard and Mahmood Hasanloo and Mehdi
Kargahi",
title = "Analytical Program Power Characterization for Battery
Depletion-time Estimation",
journal = j-TECS,
volume = "20",
number = "2",
pages = "9:1--9:9",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3421511",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3421511",
abstract = "Appropriate battery selection is a major design
decision regarding the fast growth of battery-operated
devices like space rovers, wireless sensor network
nodes, rescue robots, and so on. Many such systems are
mission critical, where estimation of the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ungureanu:2021:FAT,
author = "George Ungureanu and Jos{\'e} Edil {Guimar{\~a}es De
Medeiros} and Timmy Sundstr{\"o}m and Ingemar
S{\"o}derquist and Anders {\AA}hlander and Ingo
Sander",
title = "{ForSyDe-Atom}: Taming Complexity in Cyber Physical
System Design with Layers",
journal = j-TECS,
volume = "20",
number = "2",
pages = "10:1--10:27",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3424667",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3424667",
abstract = "We present ForSyDe-Atom, a formal framework intended
as an entry point for disciplined design of complex
cyber-physical systems. This framework provides a set
of rules for combining several domain-specific
languages as structured, enclosing layers to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2021:HCO,
author = "Keqin Li",
title = "Heuristic Computation Offloading Algorithms for Mobile
Users in Fog Computing",
journal = j-TECS,
volume = "20",
number = "2",
pages = "11:1--11:28",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3426852",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3426852",
abstract = "The investigation in this article makes the following
important contributions to combinatorial optimization
of computation offloading in fog computing. First, we
rigorously define the two problems of optimal
computation offloading with energy constraint
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dharmaraj:2021:OSP,
author = "Celia Dharmaraj and Vinita Vasudevan and Nitin
Chandrachoodan",
title = "Optimization of Signal Processing Applications Using
Parameterized Error Models for Approximate Adders",
journal = j-TECS,
volume = "20",
number = "2",
pages = "12:1--12:25",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3430509",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3430509",
abstract = "Approximate circuit design has gained significance in
recent years targeting error-tolerant applications. In
the literature, there have been several attempts at
optimizing the number of approximate bits of each
approximate adder in a system for a given \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Atoofian:2021:REG,
author = "Ehsan Atoofian and Zayan Shaikh and Ali Jannesari",
title = "Reducing Energy in {GPGPUs} through Approximate
Trivial Bypassing",
journal = j-TECS,
volume = "20",
number = "2",
pages = "13:1--13:27",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3429440",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3429440",
abstract = "General-purpose computing using graphics processing
units (GPGPUs) is an attractive option for acceleration
of applications with massively data-parallel tasks.
While performance of modern GPGPUs is increasing
rapidly, the power consumption of these \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Carreon:2021:PET,
author = "Nadir A. Carreon and Sixing Lu and Roman Lysecky",
title = "Probabilistic Estimation of Threat Intrusion in
Embedded Systems for Runtime Detection",
journal = j-TECS,
volume = "20",
number = "2",
pages = "14:1--14:27",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3432590",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3432590",
abstract = "With billions of networked connected embedded systems,
the security historically provided by the isolation of
embedded systems is no longer sufficient. Millions of
new malware are created every month and zero-day
attacks are becoming an increasing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Akbari:2021:FHA,
author = "Ali Akbari and Jonathan Martinez and Roozbeh Jafari",
title = "Facilitating Human Activity Data Annotation via
Context-Aware Change Detection on Smartwatches",
journal = j-TECS,
volume = "20",
number = "2",
pages = "15:1--15:20",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431503",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3431503",
abstract = "Annotating activities of daily living (ADL) is vital
for developing machine learning models for activity
recognition. In addition, it is critical for
self-reporting purposes such as in assisted living
where the users are asked to log their ADLs. However,.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ko:2021:LCL,
author = "Yousun Ko and Alex Chadwick and Daniel Bates and
Robert Mullins",
title = "Lane Compression: a Lightweight Lossless Compression
Method for Machine Learning on Embedded Systems",
journal = j-TECS,
volume = "20",
number = "2",
pages = "16:1--16:26",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431815",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3431815",
abstract = "This article presents Lane Compression, a lightweight
lossless compression technique for machine learning
that is based on a detailed study of the statistical
properties of machine learning data. The proposed
technique profiles machine learning data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sepulveda:2021:BCA,
author = "Johanna Sep{\'u}lveda and Mathieu Gross and Andreas
Zankl and Georg Sigl",
title = "Beyond Cache Attacks: Exploiting the Bus-based
Communication Structure for Powerful On-Chip
Microarchitectural Attacks",
journal = j-TECS,
volume = "20",
number = "2",
pages = "17:1--17:23",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3433653",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 20 17:37:34 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3433653",
abstract = "System-on-Chips (SoCs) are a key enabling technology
for the Internet-of-Things (IoT), a hyper-connected
world where on- and inter-chip communication is
ubiquitous. SoCs usually integrate cryptographic
hardware cores for confidentiality and authentication
services. However, these components are prone to
implementation attacks. During the operation of a
cryptographic core, the secret key may passively be
inferred through cache observations. Access-driven
attacks exploiting these observations are therefore a
vital threat to SoCs operating in IoT environments.
Previous works have shown the feasibility of these
attacks in the SoC context. Yet, the SoC communication
structure can be used to further improve access-based
cache attacks. The communication attacks are not as
well-understood as other micro-architectural attacks.
It is important to raise the awareness of SoC designers
of such a threat. To this end, we present four
contributions. First, we demonstrate an improved
Prime+Probe attack on four different AES-128
implementations (original transformation tables,
T0-Only, T2KB, and S-Box). As a novelty, this attack
exploits the collisions of the bus-based SoC
communication to further increase its efficiency.
Second, we explore the impact of preloading on the
efficiency of our communication-optimized attack.
Third, we integrate three countermeasures (shuffling,
mini-tables, and Time-Division Multiple Access (TDMA)
bus arbitration) and evaluate their impact on the
attack. Although shuffling and mini-tables
countermeasures were proposed in previous work, their
application as countermeasures against the bus-based
attack was not studied before. In addition, TDMA as a
countermeasure for bus-based attacks is an original
contribution of this work. Fourth, we further discuss
the implications of our work in the SoC design and its
perspective with the new cryptographic primitives
proposed in the ongoing National Institute of Standard
and Technology Lightweight Cryptography competition.
The results show that our improved
communication-optimized attack is efficient, speeding
up full key recovery by up to 400 times when compared
to the traditional Prime+Probe technique. Moreover, the
protection techniques are feasible and effectively
mitigate the proposed improved attack.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mitra:2021:ERA,
author = "Tulika Mitra",
title = "Editorial: Reimagining {{\booktitle{ACM Transactions
on Embedded Computing Systems (TECS)}}}",
journal = j-TECS,
volume = "20",
number = "3",
pages = "18e:1--18e:3",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450438",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3450438",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18e",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Langerman:2021:RTH,
author = "David Langerman and Alan George",
title = "Real-time, High-resolution Depth Upsampling on
Embedded Accelerators",
journal = j-TECS,
volume = "20",
number = "3",
pages = "18:1--18:22",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436878",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3436878",
abstract = "High-resolution, low-latency apps in computer vision
are ubiquitous in today's world of mixed-reality
devices. These innovations provide a platform that can
leverage the improving technology of depth sensors and
embedded accelerators to enable higher-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Seo:2021:SBA,
author = "Hwajeong Seo and Pakize Sanal and Reza Azarderakhsh",
title = "{SIKE} in 32-bit {ARM} Processors Based on Redundant
Number System for {NIST} Level-{II}",
journal = j-TECS,
volume = "20",
number = "3",
pages = "19:1--19:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439733",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3439733",
abstract = "We present an optimized implementation of the
post-quantum Supersingular Isogeny Key Encapsulation
(SIKE) for 32-bit ARMv7-A processors supporting NEON
engine (i.e., SIMD instruction). Unlike previous SIKE
implementations, finite field arithmetic is efficiently
implemented in a redundant representation, which avoids
carry propagation and pipeline stall. Furthermore, we
adopted several state-of-the-art engineering techniques
as well as hand-crafted assembly implementation for
high performance. Optimized implementations are ported
to Microsoft SIKE library written in ``a non-redundant
representation'' and evaluated in high-end 32-bit
ARMv7-A processors, such as ARM Cortex-A5, A7, and A15.
A full key-exchange execution of SIKEp503 is performed
in about 109 million cycles on ARM Cortex-A15
processors (i.e., 54.5 ms @2.0 GHz), which is about $
1.58 \times $ faster than previous state-of-the-art
work presented in CHES 18.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ma:2021:CSA,
author = "Mingze Ma and Rizos Sakellariou",
title = "Code-size-aware Scheduling of Synchronous Dataflow
Graphs on Multicore Systems",
journal = j-TECS,
volume = "20",
number = "3",
pages = "20:1--20:24",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3440034",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3440034",
abstract = "Synchronous dataflow graphs are widely used to model
digital signal processing and multimedia applications.
Self-timed execution is an efficient methodology for
the analysis and scheduling of synchronous dataflow
graphs. In this article, we propose a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yuan:2021:CCB,
author = "Bo Yuan and Xiaofen Lu and Ke Tang and Xin Yao",
title = "Cooperative Coevolution-based Design Space Exploration
for Multi-mode Dataflow Mapping",
journal = j-TECS,
volume = "20",
number = "3",
pages = "21:1--21:25",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3440246",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3440246",
abstract = "Some signal processing and multimedia applications can
be specified by synchronous dataflow (SDF) models. The
problem of SDF mapping to a given set of heterogeneous
processors has been known to be NP-hard and widely
studied in the design automation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leon:2021:IPP,
author = "Vasileios Leon and George Lentaris and Evangelos
Petrongonas and Dimitrios Soudris and Gianluca Furano
and Antonis Tavoularis and David Moloney",
title = "Improving Performance-Power-Programmability in Space
Avionics with Edge Devices: {VBN} on Myriad2 {SoC}",
journal = j-TECS,
volume = "20",
number = "3",
pages = "22:1--22:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3440885",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3440885",
abstract = "The advent of powerful edge devices and AI algorithms
has already revolutionized many terrestrial
applications; however, for both technical and
historical reasons, the space industry is still
striving to adopt these key enabling technologies in
new \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shamsa:2021:UUB,
author = "Elham Shamsa and Alma Pr{\"o}bstl and Nima TaheriNejad
and Anil Kanduri and Samarjit Chakraborty and Amir M.
Rahmani and Pasi Liljeberg",
title = "{UBAR}: User- and Battery-aware Resource Management
for Smartphones",
journal = j-TECS,
volume = "20",
number = "3",
pages = "23:1--23:25",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441644",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3441644",
abstract = "Smartphone users require high Battery Cycle Life (BCL)
and high Quality of Experience (QoE) during their
usage. These two objectives can be conflicting based on
the user preference at run-time. Finding the best
trade-off between QoE and BCL requires an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rottleuthner:2021:SYP,
author = "Michel Rottleuthner and Thomas C. Schmidt and Matthias
W{\"a}hlisch",
title = "Sense Your Power: The {ECO} Approach to Energy
Awareness for {IoT} Devices",
journal = j-TECS,
volume = "20",
number = "3",
pages = "24:1--24:25",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441643",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3441643",
abstract = "Energy-constrained sensor nodes can adaptively
optimize their energy consumption if a continuous
measurement is provided. This is of particular
importance in scenarios of high dynamics such as with
energy harvesting. Still, self-measuring of power
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Marshall:2021:PCP,
author = "James Marshall and Robert Gifford and Gedare Bloom and
Gabriel Parmer and Rahul Simha",
title = "Precise Cache Profiling for Studying Radiation
Effects",
journal = j-TECS,
volume = "20",
number = "3",
pages = "25:1--25:25",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442339",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3442339",
abstract = "Increased access to space has led to an increase in
the usage of commodity processors in radiation
environments. These processors are vulnerable to
transient faults such as single event upsets that may
cause bit-flips in processor components. Caches in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Poudel:2021:MFU,
author = "Prawar Poudel and Biswajit Ray and Aleksandar
Milenkovic",
title = "Microcontroller Fingerprinting Using Partially Erased
{NOR} Flash Memory Cells",
journal = j-TECS,
volume = "20",
number = "3",
pages = "26:1--26:23",
month = apr,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448271",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Apr 24 07:51:05 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3448271",
abstract = "Electronic device fingerprints, unique bit vectors
extracted from device's physical properties, are used
to differentiate between instances of functionally
identical devices. This article introduces a new
technique that extracts fingerprints from unique
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Girault:2021:ISI,
author = "Alain Girault and Reinhard {Von Hanxleden}",
title = "Introduction to the Special Issue on {Specification
and Design Languages (FDL 2019)}",
journal = j-TECS,
volume = "20",
number = "4",
pages = "27:1--27:3",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458748",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3458748",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shi:2021:TGH,
author = "Zhendong Shi and Haocheng Ma and Qizhi Zhang and
Yanjiang Liu and Yiqiang Zhao and Jiaji He",
title = "Test Generation for Hardware {Trojan} Detection Using
Correlation Analysis and Genetic Algorithm",
journal = j-TECS,
volume = "20",
number = "4",
pages = "28:1--28:20",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446837",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3446837",
abstract = "Hardware Trojan (HT) is a major threat to the security
of integrated circuits (ICs). Among various HT
detection approaches, side channel analysis (SCA)-based
methods have been extensively studied. SCA-based
methods try to detect HTs by comparing side \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jackson:2021:EES,
author = "Riley Jackson and Jonathan Gresl and Ramon Lawrence",
title = "Efficient External Sorting for Memory-Constrained
Embedded Devices with Flash Memory",
journal = j-TECS,
volume = "20",
number = "4",
pages = "29:1--29:21",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446976",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3446976",
abstract = "Embedded devices are ubiquitous in areas of industrial
and environmental monitoring, health and safety, and
consumer appliances. A common use case is data
collection, processing, and performing actions based on
data analysis. Although many Internet of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rahman:2021:LTW,
author = "Mahbubur Rahman and Dali Ismail and Venkata P.
Modekurthy and Abusayeed Saifullah",
title = "{LPWAN} in the {TV} White Spaces: a Practical
Implementation and Deployment Experiences",
journal = j-TECS,
volume = "20",
number = "4",
pages = "30:1--30:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447877",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3447877",
abstract = "Low-Power Wide-Area Network (LPWAN) is an enabling
Internet-of-Things technology that supports long-range,
low-power, and low-cost connectivity to numerous
devices. To avoid the crowd in the limited ISM band
(where most LPWANs operate) and cost of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bombieri:2021:SIS,
author = "Nicola Bombieri and Silvia Scaffeo and Antonio
Mastrandrea and Simone Caligola and Tommaso Carlucci
and Franco Fummi and Carlo Laudanna and Gabriela
Constantin and Rosalba Giugno",
title = "{SystemC} Implementation of Stochastic {Petri} Nets
for Simulation and Parameterization of Biological
Networks",
journal = j-TECS,
volume = "20",
number = "4",
pages = "31:1--31:20",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3427091",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3427091",
abstract = "Model development and simulation of biological
networks is recognized as a key task in Systems
Biology. Integrated with in vitro and in vivo
experimental data, network simulation allows for the
discovery of the dynamics that regulate biological
systems. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gressl:2021:DSE,
author = "Lukas Gressl and Christian Steger and Ulrich Neffe",
title = "Design Space Exploration for Secure {IoT} Devices and
Cyber-Physical Systems",
journal = j-TECS,
volume = "20",
number = "4",
pages = "32:1--32:24",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3430372",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3430372",
abstract = "With the advent of the Internet of Things (IoT) and
Cyber-Physical Systems (CPS), embedded devices have
been gaining importance in our daily lives, as well as
industrial processes. Independent of their usage, be it
within an IoT system or a CPS, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bruns:2021:TMC,
author = "Friederike Bruns and Irune Yarza and Philipp
Ittershagen and Kim Gr{\"u}ttner",
title = "Time Measurement and Control Blocks for Bare-Metal
{C++} Applications",
journal = j-TECS,
volume = "20",
number = "4",
pages = "34:1--34:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434401",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3434401",
abstract = "Precisely timed execution of resource constrained
bare-metal applications is difficult, because the
embedded software developer usually has to implement
and check the timeliness of the executed application
through manual interaction with timers or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dupont:2021:EBH,
author = "Guillaume Dupont and Yamine Ait-Ameur and Neeraj Kumar
Singh and Marc Pantel",
title = "{Event-B} Hybridation: a Proof and Refinement-based
Framework for Modelling Hybrid Systems",
journal = j-TECS,
volume = "20",
number = "4",
pages = "35:1--35:37",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448270",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3448270",
abstract = "Hybrid systems are complex systems where a software
controller interacts with a physical environment,
usually named a plant, through sensors and actuators.
The specification and design of such systems usually
rely on the description of both continuous \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schulz-Rosengarten:2021:TOO,
author = "Alexander Schulz-Rosengarten and Steven Smyth and
Michael Mendler",
title = "Toward Object-oriented Modeling in {SCCharts}",
journal = j-TECS,
volume = "20",
number = "4",
pages = "37:1--37:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453482",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3453482",
abstract = "Object orientation is a powerful and widely used
paradigm for abstraction and structuring in
programming. Many languages are designed with this
principle or support different degrees of object
orientation. In synchronous languages, originally
developed \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Broman:2021:IPM,
author = "David Broman",
title = "Interactive Programmatic Modeling",
journal = j-TECS,
volume = "20",
number = "4",
pages = "33:1--33:26",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431387",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3431387",
abstract = "Modeling and computational analyses are fundamental
activities within science and engineering. Analysis
activities can take various forms, such as simulation
of executable models, formal verification of model
properties, or inference of hidden model \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lohstroh:2021:TLF,
author = "Marten Lohstroh and Christian Menard and Soroush
Bateni and Edward A. Lee",
title = "Toward a Lingua Franca for Deterministic Concurrent
Systems",
journal = j-TECS,
volume = "20",
number = "4",
pages = "36:1--36:27",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448128",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sun Jun 6 07:03:40 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3448128",
abstract = "Many programming languages and programming frameworks
focus on parallel and distributed computing. Several
frameworks are based on actors, which provide a more
disciplined model for concurrency than threads. The
interactions between actors, however, if \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shrivastava:2020:ISIa,
author = "Aviral Shrivastava and Jian-Jia Chen and Youtao
Zhang",
title = "Introduction to the Special Issue on Languages,
Compilers, Tools, and Theory of Embedded Systems: {Part
1}",
journal = j-TECS,
volume = "19",
number = "5",
pages = "30:1--30:3",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3417732",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3417732",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2020:DAR,
author = "Wanli Chang and Ran Wei and Shuai Zhao and Andy
Wellings and Jim Woodcock and Alan Burns",
title = "Development Automation of Real-Time {Java}:
Model-Driven Transformation and Synthesis",
journal = j-TECS,
volume = "19",
number = "5",
pages = "31:1--31:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391897",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391897",
abstract = "Many applications in emerging scenarios, such as
autonomous vehicles, intelligent robots, and industrial
automation, are safety-critical with strict timing
requirements. However, the development of real-time
systems is error prone and highly dependent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Venkataramani:2020:SSD,
author = "Vanchinathan Venkataramani and Aditi Kulkarni and
Tulika Mitra and Li-Shiuan Peh",
title = "{SPECTRUM}: a Software-defined Predictable Many-core
Architecture for {LTE\slash 5G} Baseband Processing",
journal = j-TECS,
volume = "19",
number = "5",
pages = "32:1--32:28",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3400032",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3400032",
abstract = "Wireless communication standards such as Long-term
Evolution (LTE) are rapidly changing to support the
high data-rate of wireless devices. The physical layer
baseband processing has strict real-time deadlines,
especially in the next-generation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reghenzani:2020:DUP,
author = "Federico Reghenzani and Luca Santinelli and William
Fornaciari",
title = "Dealing with Uncertainty in {pWCET} Estimations",
journal = j-TECS,
volume = "19",
number = "5",
pages = "33:1--33:23",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396234",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3396234",
abstract = "The problem of estimating a tight and safe Worst-Case
Execution Time (WCET), needed for certification in
safety-critical environment, is a challenging problem
for modern embedded systems. A possible solution
proposed in past years is to exploit \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Calderon:2020:GUE,
author = "Alejandro J. Calder{\'o}n and Leonidas Kosmidis and
Carlos F. Nicol{\'a}s and Francisco J. Cazorla and Peio
Onaindia",
title = "{GMAI}: Understanding and Exploiting the Internals of
{GPU} Resource Allocation in Critical Systems",
journal = j-TECS,
volume = "19",
number = "5",
pages = "34:1--34:23",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391896",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391896",
abstract = "Critical real-time systems require strict resource
provisioning in terms of memory and timing. The
constant need for higher performance in these systems
has led industry to recently include GPUs. However, GPU
software ecosystems are by their nature \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2020:CTC,
author = "Chundong Wang and Sudipta Chattopadhyay and Gunavaran
Brihadiswarn",
title = "{Crab-tree}: a Crash Recoverable {B+}-tree Variant for
Persistent Memory with {ARMv8} Architecture",
journal = j-TECS,
volume = "19",
number = "5",
pages = "35:1--35:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396236",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3396236",
abstract = "In recent years, the next-generation non-volatile
memory (NVM) technologies have emerged with DRAM-like
byte addressability and disk-like durability. Computer
architects have proposed to use them to build
persistent memory that blurs the conventional
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bresch:2020:TXP,
author = "Cyril Bresch and David H{\'e}ly and Roman Lysecky and
St{\'e}phanie Chollet and Ioannis Parissis",
title = "{TrustFlow-X}: a Practical Framework for Fine-grained
Control-flow Integrity in Critical Systems",
journal = j-TECS,
volume = "19",
number = "5",
pages = "36:1--36:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3398327",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3398327",
abstract = "This article addresses the challenges of memory safety
in life-critical medical devices. Since the last
decade, healthcare manufacturers have embraced the
Internet of Things, pushing technological innovations
to increase market share. Medical devices, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lukyanov:2020:FVS,
author = "Georgy Lukyanov and Andrey Mokhov and Jakob Lechner",
title = "Formal Verification of Spacecraft Control Programs",
journal = j-TECS,
volume = "19",
number = "5",
pages = "37:1--37:18",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391900",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391900",
abstract = "Verification of correctness of control programs is an
essential task in the development of space electronics;
it is difficult and typically outweighs design and
programming tasks in terms of development hours. This
article presents a verification \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kadiyala:2020:HPC,
author = "Sai Praveen Kadiyala and Pranav Jadhav and Siew-Kei
Lam and Thambipillai Srikanthan",
title = "Hardware Performance Counter-Based Fine-Grained
Malware Detection",
journal = j-TECS,
volume = "19",
number = "5",
pages = "38:1--38:17",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3403943",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3403943",
abstract = "Detection of malicious programs using hardware-based
features has gained prominence recently. The
tamper-resistant hardware metrics prove to be a better
security feature than the high-level software metrics,
which can be easily obfuscated. Hardware \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Stitt:2020:PAI,
author = "Greg Stitt and David Campbell",
title = "{PANDORA}: an Architecture-Independent Parallelizing
Approximation-Discovery Framework",
journal = j-TECS,
volume = "19",
number = "5",
pages = "39:1--39:17",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391899",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391899",
abstract = "In this article, we introduce a parallelizing
approximation-discovery framework, PANDORA, for
automatically discovering application- and
architecture-specialized approximations of provided
code. PANDORA complements existing compilers and
runtime \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Atoofian:2020:ACG,
author = "Ehsan Atoofian",
title = "Approximate Cache in {GPGPUs}",
journal = j-TECS,
volume = "19",
number = "5",
pages = "40:1--40:22",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3407904",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:34:59 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3407904",
abstract = "There is a growing number of application domains
ranging from multimedia to machine learning where a
certain level of inexactness can be tolerated. For
these applications, approximate computing is an
effective technique that trades off some loss in
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shrivastava:2020:ISIb,
author = "Aviral Shrivastava and Jian-Jia Chen and Youtao
Zhang",
title = "Introduction to the Special Issue on Languages,
Compilers, Tools, and Theory of Embedded Systems: {Part
2}",
journal = j-TECS,
volume = "19",
number = "6",
pages = "41:1--41:2",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3417734",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3417734",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hsiao:2020:CHC,
author = "Luke Hsiao and Sen Wu and Nicholas Chiang and
Christopher R{\'e} and Philip Levis",
title = "Creating Hardware Component Knowledge Bases with
Training Data Generation and Multi-task Learning",
journal = j-TECS,
volume = "19",
number = "6",
pages = "42:1--42:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391906",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391906",
abstract = "Hardware component databases are vital resources in
designing embedded systems. Since creating these
databases requires hundreds of thousands of hours of
manual data entry, they are proprietary, limited in the
data they provide, and have random data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Samragh:2020:ERB,
author = "Mohammad Samragh and Mojan Javaheripi and Farinaz
Koushanfar",
title = "{EncoDeep}: Realizing Bit-flexible Encoding for Deep
Neural Networks",
journal = j-TECS,
volume = "19",
number = "6",
pages = "43:1--43:29",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391901",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391901",
abstract = "This article proposes EncoDeep, an end-to-end
framework that facilitates encoding, bitwidth
customization, fine-tuning, and implementation of
neural networks on FPGA platforms. EncoDeep
incorporates nonlinear encoding to the computation flow
of neural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khan:2020:OTC,
author = "Asif Ali Khan and Norman A. Rink and Fazal Hameed and
Jeronimo Castrillon",
title = "Optimizing Tensor Contractions for Embedded Devices
with Racetrack and {DRAM} Memories",
journal = j-TECS,
volume = "19",
number = "6",
pages = "44:1--44:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3396235",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3396235",
abstract = "Tensor contraction is a fundamental operation in many
algorithms with a plethora of applications ranging from
quantum chemistry over fluid dynamics and image
processing to machine learning. The performance of
tensor computations critically depends on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2020:FEE,
author = "Saad Ahmed and Naveed Anwar Bhatti and Muhammad Hamad
Alizai and Junaid Haroon Siddiqui and Luca Mottola",
title = "Fast and Energy-Efficient State Checkpointing for
Intermittent Computing",
journal = j-TECS,
volume = "19",
number = "6",
pages = "45:1--45:27",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391903",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391903",
abstract = "Intermittently powered embedded devices ensure forward
progress of programs through state checkpointing in
non-volatile memory. Checkpointing is, however,
expensive in energy and adds to the execution times. To
minimize this overhead, we present DICE, a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2020:DIA,
author = "Xinyi Li and Lei Zhang and Xipeng Shen",
title = "{DIAC}: an Inter-app Conflicts Detector for Open {IoT}
Systems",
journal = j-TECS,
volume = "19",
number = "6",
pages = "46:1--46:25",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391895",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391895",
abstract = "This article tackles the problem of detecting and
solving potential conflicts among independently
developed apps that are to be installed into an open
Internet-of-Things (IoT) environment. It provides a new
set of definitions and categorizations of the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2020:DEC,
author = "Saad Ahmed and Muhammad Nawaz and Abu Bakar and Naveed
Anwar Bhatti and Muhammad Hamad Alizai and Junaid
Haroon Siddiqui and Luca Mottola",
title = "Demystifying Energy Consumption Dynamics in
Transiently powered Computers",
journal = j-TECS,
volume = "19",
number = "6",
pages = "47:1--47:25",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391893",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391893",
abstract = "Transiently powered computers (TPCs) form the
foundation of the battery-less Internet of Things,
using energy harvesting and small capacitors to power
their operation. This kind of power supply is
characterized by extreme variations in supply voltage,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wade:2020:EIP,
author = "April W. Wade and Prasad A. Kulkarni and Michael R.
Jantz",
title = "Exploring Impact of Profile Data on Code Quality in
the {HotSpot JVM}",
journal = j-TECS,
volume = "19",
number = "6",
pages = "48:1--48:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391894",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391894",
abstract = "Managed language virtual machines (VM) rely on dynamic
or just-in-time (JIT) compilation to generate optimized
native code at run-time to deliver high execution
performance. Many VMs and JIT compilers collect profile
data at run-time to enable profile-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Reissmann:2020:RIR,
author = "Nico Reissmann and Jan Christian Meyer and Helge
Bahmann and Magnus Sj{\"a}lander",
title = "{RVSDG}: an Intermediate Representation for Optimizing
Compilers",
journal = j-TECS,
volume = "19",
number = "6",
pages = "49:1--49:28",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391902",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391902",
abstract = "Intermediate Representations (IRs) are central to
optimizing compilers as the way the program is
represented may enhance or limit analyses and
transformations. Suitable IRs focus on exposing the
most relevant information and establish invariants that
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Latifis:2020:RMC,
author = "Ioannis Latifis and Karthick Parashar and Grigoris
Dimitroulakos and Hans Cappelle and Christakis Lezos
and Konstantinos Masselos and Francky Catthoor",
title = "A Retargetable {MATLAB-to-C} Compiler Exploiting
Custom Instructions and Data Parallelism",
journal = j-TECS,
volume = "19",
number = "6",
pages = "50:1--50:27",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391898",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3391898",
abstract = "This article presents a MATLAB-to-C compiler that
exploits custom instructions present in
state-of-the-art processor architectures and supports
semi-automatic vectorization. A parameterized processor
model is used to describe the target instruction set
architecture to achieve user-friendly retargetability.
Custom instructions are represented via specialized
intrinsic functions in the generated code, which can
then be used as input to any C/C++ compiler supporting
the target processor. In addition, the compiler
supports the generation of data parallel\slash
vectorized code through the introduction of data
packing\slash unpacking statements. The compiler has
been used for code generation targeting ARM and x86
architectures for several benchmarks. The vectorized
code generated by the compiler achieves an average
speedup of 4.1 $ \times $ and 2.7 $ \times $ for packed
fixed and floating point data, respectively, compared
to scalarized code for ARM architecture and an average
speedup of 3.1 $ \times $ and 1.5 $ \times $ for packed
fixed and floating point data, respectively, for x86
architecture. Implementing data parallel instructions
directly in the assembly code would have required a lot
of design effort, and it would not been sustainable
across evolving platform variants. Thus, the compiler
can be employed to efficiently speed up critical
sections of the target application. The compiler is
therefore potentially employable to raise the design
abstraction and reduce development time for both
embedded and general-purpose applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Susu:2020:VLA,
author = "Alexandru E. Susu",
title = "A Vector-Length Agnostic Compiler for the {Connex-S}
Accelerator with Scratchpad Memory",
journal = j-TECS,
volume = "19",
number = "6",
pages = "51:1--51:30",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3406536",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:18 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3406536",
abstract = "Compiling sequential C programs for Connex-S, a
competitive, scalable and customizable, wide vector
accelerator for intensive embedded applications with 32
to 4,096 16-bit integer lanes and a limited capacity
local scratchpad memory, is challenging. Our \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2021:D,
author = "Edward A. Lee",
title = "Determinism",
journal = j-TECS,
volume = "20",
number = "5",
pages = "38:1--38:34",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453652",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3453652",
abstract = "This article is about deterministic models, what they
are, why they are useful, and what their limitations
are. First, the article emphasizes that determinism is
a property of models, not of physical systems. Whether
a model is deterministic or not \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leon:2021:IPD,
author = "Vasileios Leon and Theodora Paparouni and Evangelos
Petrongonas and Dimitrios Soudris and Kiamal
Pekmestzi",
title = "Improving Power of {DSP} and {CNN} Hardware
Accelerators Using Approximate Floating-point
Multipliers",
journal = j-TECS,
volume = "20",
number = "5",
pages = "39:1--39:21",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448980",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3448980",
abstract = "Approximate computing has emerged as a promising
design alternative for delivering power-efficient
systems and circuits by exploiting the inherent error
resiliency of numerous applications. The current
article aims to tackle the increased hardware cost of
floating-point multiplication units, which prohibits
their usage in embedded computing. We introduce AFMU
(Approximate Floating-point MUltiplier), an
area/power-efficient family of multipliers, which apply
two approximation techniques in the resource-hungry
mantissa multiplication and can be seamlessly extended
to support dynamic configuration of the approximation
levels via gating signals. AFMU offers large accuracy
configuration margins, provides negligible logic
overhead for dynamic configuration, and detects
unexpected results that may arise due to the
approximations. Our evaluation shows that AFMU delivers
energy gains in the range 3.6\%--53.5\% for
half-precision and 37.2\%--82.4\% for single-precision,
in exchange for mean relative error around
0.05\%--3.33\% and 0.01\%--2.20\%, respectively. In
comparison with state-of-the-art multipliers, AFMU
exhibits up to 4--6 $ \times $ smaller error on average
while delivering more energy-efficient computing. The
evaluation in image processing shows that AFMU provides
sufficient quality of service, i.e., more than 50db
PSNR and near 1 SSIM values, and up to 57.4\% power
reduction. When used in floating-point CNNs, the
accuracy loss is small (or zero), i.e., up to 5.4\% for
MNIST and CIFAR-10, in exchange for up to 63.8\% power
gain.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Garcia:2021:IHG,
author = "Andr{\'e}s Amaya Garc{\'\i}a and David May and Ed
Nutting",
title = "Integrated Hardware Garbage Collection",
journal = j-TECS,
volume = "20",
number = "5",
pages = "40:1--40:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450147",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/csharp.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3450147",
abstract = "Garbage collected programming languages, such as
Python and C\#, have accelerated software development.
These modern languages increase productivity and
software reliability as they provide high-level data
representation and control structures. Modern languages
are widely used in software development for mobile,
desktop, and server devices, but their adoption is
limited in real-time embedded systems.\par
There is clear interest in supporting modern languages
in embedded devices as emerging markets, like the
Internet of Things, demand ever smarter and more
reliable products. Multiple commercial and open-source
projects, such as Zerynth and MicroPython, are
attempting to provide support. But these projects rely
on software garbage collectors that impose high
overheads and introduce unpredictable pauses,
preventing their use in many embedded applications.
These limitations arise from the unsuitability of
conventional processors for performing efficient,
predictable garbage collection.\par
We propose the Integrated Hardware Garbage Collector
(IHGC); a garbage collector tightly coupled with the
processor that runs continuously in the background.
Further, we introduce a static analysis technique to
guarantee that real-time programs are never paused by
the collector. Our design allocates a memory cycle to
the collector when the processor is not using the
memory. The IHGC achieves this by careful division of
collection work into single-memory-access steps that
are interleaved with the processor's memory accesses.
As a result, our collector eliminates run-time
overheads and enables real-time program
analysis.\par
The principles behind the IHGC can be used in
conjunction with existing architectures. For example,
we simulated the IHGC alongside the ARMv6-M
architecture. Compared to a conventional processor, our
experiments indicate that the IHGC offers 1.5--7 times
better performance for programs that rely on garbage
collection. The IHGC delivers the benefits of
garbage-collected languages with real-time performance
but without the complexity and overheads inherent in
software collectors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2021:RAS,
author = "Yuanbin Zhou and Soheil Samii and Petru Eles and Zebo
Peng",
title = "Reliability-aware Scheduling and Routing for Messages
in Time-sensitive Networking",
journal = j-TECS,
volume = "20",
number = "5",
pages = "41:1--41:24",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3458768",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3458768",
abstract = "Time-sensitive Networking (TSN) on Ethernet is a
promising communication technology in the automotive
and industrial automation industries due to its
real-time and high-bandwidth communication
capabilities. Time-triggered scheduling and static
routing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Valente:2021:CMS,
author = "Giacomo Valente and Tiziana Fanni and Carlo Sau and
Tania {Di Mascio} and Luigi Pomante and Francesca
Palumbo",
title = "A Composable Monitoring System for Heterogeneous
Embedded Platforms",
journal = j-TECS,
volume = "20",
number = "5",
pages = "42:1--42:34",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3461647",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3461647",
abstract = "Advanced computations on embedded devices are nowadays
a must in any application field. Often, to cope with
such a need, embedded systems designers leverage on
complex heterogeneous reconfigurable platforms that
offer high performance, thanks to the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Akdur:2021:SGI,
author = "Deniz Akdur",
title = "Skills Gaps in the Industry: Opinions of Embedded
Software Practitioners",
journal = j-TECS,
volume = "20",
number = "5",
pages = "43:1--43:39",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3463340",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3463340",
abstract = "Many practitioners in the software-intensive embedded
industry often face difficulties after beginning their
careers due to misalignment of the skills learned at
the university with what is required in the workplace.
Companies spend crucial resources to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aligholipour:2021:TTA,
author = "Rashid Aligholipour and Mohammad Baharloo and Behnam
Farzaneh and Meisam Abdollahi and Ahmad Khonsari",
title = "{TAMA}: Turn-aware Mapping and Architecture --- a
Power-efficient Network-on-Chip Approach",
journal = j-TECS,
volume = "20",
number = "5",
pages = "44:1--44:24",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3462700",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3462700",
abstract = "Nowadays, static power consumption in chip
multiprocessor (CMP) is the most crucial concern of
chip designers. Power-gating is an effective approach
to mitigate static power consumption particularly in
low utilization. Network-on-Chip (NoC) as the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Roy:2021:SQL,
author = "Sanjit Kumar Roy and Rajesh Devaraj and Arnab Sarkar
and Debabrata Senapati",
title = "{SLAQA}: Quality-level Aware Scheduling of Task Graphs
on Heterogeneous Distributed Systems",
journal = j-TECS,
volume = "20",
number = "5",
pages = "45:1--45:31",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3462776",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3462776",
abstract = "Continuous demands for higher performance and
reliability within stringent resource budgets is
driving a shift from homogeneous to heterogeneous
processing platforms for the implementation of today's
cyber-physical systems (CPSs). These CPSs are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Modekurthy:2021:DRT,
author = "Venkata P. Modekurthy and Abusayeed Saifullah and
Sanjay Madria",
title = "A Distributed Real-time Scheduling System for
Industrial Wireless Networks",
journal = j-TECS,
volume = "20",
number = "5",
pages = "46:1--46:28",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3464429",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3464429",
abstract = "The concept of Industry 4.0 introduces the unification
of industrial Internet-of-Things (IoT), cyber physical
systems, and data-driven business modeling to improve
production efficiency of the factories. To ensure high
production efficiency, Industry \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Forsberg:2021:PEM,
author = "Bj{\"o}rn Forsberg and Marco Solieri and Marko
Bertogna and Luca Benini and Andrea Marongiu",
title = "The Predictable Execution Model in Practice: Compiling
Real Applications for {COTS} Hardware",
journal = j-TECS,
volume = "20",
number = "5",
pages = "47:1--47:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465370",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3465370",
abstract = "Adoption of multi- and many-core processors in
real-time systems has so far been slowed down, if not
totally barred, due do the difficulty in providing
analytical real-time guarantees on worst-case execution
times. The Predictable Execution Model (PREM)
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Maity:2021:SSO,
author = "Biswadip Maity and Bryan Donyanavard and Anmol
Surhonne and Amir Rahmani and Andreas Herkersdorf and
Nikil Dutt",
title = "{SEAMS}: Self-Optimizing Runtime Manager for
Approximate Memory Hierarchies",
journal = j-TECS,
volume = "20",
number = "5",
pages = "48:1--48:26",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3466875",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3466875",
abstract = "Memory approximation techniques are commonly limited
in scope, targeting individual levels of the memory
hierarchy. Existing approximation techniques for a full
memory hierarchy determine optimal configurations at
design-time provided a goal and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Witterauf:2021:SLC,
author = "Michael Witterauf and Dominik Walter and Frank Hannig
and J{\"u}rgen Teich",
title = "Symbolic Loop Compilation for Tightly Coupled
Processor Arrays",
journal = j-TECS,
volume = "20",
number = "5",
pages = "49:1--49:31",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3466897",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue Aug 10 13:35:00 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3466897",
abstract = "Tightly Coupled Processor Arrays (TCPAs), a class of
massively parallel loop accelerators, allow
applications to offload computationally expensive loops
for improved performance and energy efficiency. To
achieve these two goals, executing a loop on a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bur:2021:WCE,
author = "M{\'a}rton B{\'u}r and Krist{\'o}f Marussy and Brett
H. Meyer and D{\'a}niel Varr{\'o}",
title = "Worst-case Execution Time Calculation for Query-based
Monitors by Witness Generation",
journal = j-TECS,
volume = "20",
number = "6",
pages = "107:1--107:36",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3471904",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3471904",
abstract = "Runtime monitoring plays a key role in the assurance
of modern intelligent cyber-physical systems, which are
frequently data-intensive and safety-critical. While
graph queries can serve as an expressive yet formally
precise specification language to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "107",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2021:IML,
author = "Jurn-Gyu Park and Nikil Dutt and Sung-Soo Lim",
title = "An Interpretable Machine Learning Model Enhanced
Integrated {CPU--GPU DVFS} Governor",
journal = j-TECS,
volume = "20",
number = "6",
pages = "108:1--108:28",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470974",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3470974",
abstract = "Modern heterogeneous CPU-GPU-based mobile
architectures, which execute intensive mobile
gaming/graphics applications, use software governors to
achieve high performance with energy-efficiency.
However, existing governors typically utilize simple
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "108",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ray:2021:HAS,
author = "Kaustabha Ray and Ansuman Banerjee",
title = "Horizontal Auto-Scaling for Multi-Access Edge
Computing Using Safe Reinforcement Learning",
journal = j-TECS,
volume = "20",
number = "6",
pages = "109:1--109:33",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3475991",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3475991",
abstract = "Multi-Access Edge Computing (MEC) has emerged as a
promising new paradigm allowing low latency access to
services deployed on edge servers to avert network
latencies often encountered in accessing cloud
services. A key component of the MEC environment is
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "109",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aydin:2021:HSC,
author = "Furkan Aydin and Aydin Aysu and Mohit Tiwari and
Andreas Gerstlauer and Michael Orshansky",
title = "Horizontal Side-Channel Vulnerabilities of
Post-Quantum Key Exchange and Encapsulation Protocols",
journal = j-TECS,
volume = "20",
number = "6",
pages = "110:1--110:22",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476799",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3476799",
abstract = "Key exchange protocols and key encapsulation
mechanisms establish secret keys to communicate digital
information confidentially over public channels.
Lattice-based cryptography variants of these protocols
are promising alternatives given their quantum-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "110",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Giraldo:2021:HAE,
author = "J. S. P. Giraldo and Marian Verhelst",
title = "Hardware Acceleration for Embedded Keyword Spotting:
Tutorial and Survey",
journal = j-TECS,
volume = "20",
number = "6",
pages = "111:1--111:25",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474365",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3474365",
abstract = "In recent years, Keyword Spotting (KWS) has become a
crucial human-machine interface for mobile devices,
allowing users to interact more naturally with their
gadgets by leveraging their own voice. Due to privacy,
latency and energy requirements, the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "111",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{RibeiroDaSilva:2021:MCH,
author = "Junio Cezar {Ribeiro Da Silva} and Lorena Le{\~a}o and
Vinicius Petrucci and Abdoulaye Gamati{\'e} and
Fernando Magno {Quint{\~a}o Pereira}",
title = "Mapping Computations in Heterogeneous Multicore
Systems with Statistical Regression on Program Inputs",
journal = j-TECS,
volume = "20",
number = "6",
pages = "112:1--112:35",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3478288",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3478288",
abstract = "A hardware configuration is a set of processors and
their frequency levels in a multicore heterogeneous
system. This article presents a compiler-based
technique to match functions with hardware
configurations. Such a technique consists of using
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "112",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2021:VSH,
author = "Yu Wang and Nima Roohi and Matthew West and Mahesh
Viswanathan and Geir E. Dullerud",
title = "Verifying Stochastic Hybrid Systems with Temporal
Logic Specifications via Model Reduction",
journal = j-TECS,
volume = "20",
number = "6",
pages = "113:1--113:27",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483380",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3483380",
abstract = "We present a scalable methodology to verify stochastic
hybrid systems for inequality linear temporal logic
(iLTL) or inequality metric interval temporal logic
(iMITL). Using the Mori--Zwanzig reduction method, we
construct a finite-state Markov chain \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "113",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Servais:2021:ACR,
author = "Jason Servais and Ehsan Atoofian",
title = "Adaptive Computation Reuse for Energy-Efficient
Training of Deep Neural Networks",
journal = j-TECS,
volume = "20",
number = "6",
pages = "114:1--114:24",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487025",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3487025",
abstract = "In recent years, Deep Neural Networks (DNNs) have been
deployed into a diverse set of applications from voice
recognition to scene generation mostly due to their
high-accuracy. DNNs are known to be computationally
intensive applications, requiring a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "114",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saini:2021:IFC,
author = "Kanika Saini and Sheetal Kalra and Sandeep K. Sood",
title = "{IoT}-Fog-Cloud Centric Earthquake Monitoring and
Prediction",
journal = j-TECS,
volume = "20",
number = "6",
pages = "115:1--115:26",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487942",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Dec 10 11:17:19 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3487942",
abstract = "Earthquakes are among the most inevitable natural
catastrophes. The uncertainty about the severity of the
earthquake has a profound effect on the burden of
disaster and causes massive economic and societal
losses. Although unpredictable, it can be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "115",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2022:ISIa,
author = "Yuan-Hao Chang and Jalil Boukhobza and Song Han",
title = "Introduction to the Special Issue on Memory and
Storage Systems for Embedded and {IoT} Applications",
journal = j-TECS,
volume = "21",
number = "1",
pages = "1:1--1:4",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3505283",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3505283",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Manohar:2022:CUC,
author = "Sheel Sindhu Manohar and Sparsh Mittal and Hemangee K.
Kapoor",
title = "{CORIDOR}: Using {COherence} and {TempoRal LocalIty}
to Mitigate Read Disurbance {ErrOR} in {STT--RAM}
Caches",
journal = j-TECS,
volume = "21",
number = "1",
pages = "2:1--2:24",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3484493",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3484493",
abstract = "In the deep sub-micron region, ``spin-transfer torque
RAM'' (STT-RAM) suffers from ``read-disturbance error''
(RDE), whereby a read operation disturbs the stored
data. Mitigation of RDE requires restore operations,
which imposes latency and energy penalties. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Marinelli:2022:MES,
author = "Tommaso Marinelli and Jos{\'e} Ignacio G{\'o}mez
P{\'e}rez and Christian Tenllado and Manu Komalan and
Mohit Gupta and Francky Catthoor",
title = "Microarchitectural Exploration of {STT--MRAM}
Last-level Cache Parameters for Energy-efficient
Devices",
journal = j-TECS,
volume = "21",
number = "1",
pages = "3:1--3:20",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3490391",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3490391",
abstract = "As the technology scaling advances, limitations of
traditional memories in terms of density and energy
become more evident. Modern caches occupy a large part
of a CPU physical size and high static leakage poses a
limit to the overall efficiency of the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wittig:2022:AES,
author = "Robert Wittig and Philipp Schulz and Emil Matus and
Gerhard P. Fettweis",
title = "Accurate Estimation of Service Rates in Interleaved
Scratchpad Memory Systems",
journal = j-TECS,
volume = "21",
number = "1",
pages = "4:1--4:15",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457171",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3457171",
abstract = "The prototyping of embedded platforms demands rapid
exploration of multi-dimensional parameter sets.
Especially the design of the memory system is essential
to guarantee high utilization while reducing conflicts
at the same time. To aid the design process, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hakert:2022:SMR,
author = "Christian Hakert and Kuan-Hsun Chen and Horst
Schirmeier and Lars Bauer and Paul R. Genssler and
Georg von der Br{\"u}ggen and Hussam Amrouch and
J{\"o}rg Henkel and Jian-Jia Chen",
title = "Software-Managed Read and Write Wear-Leveling for
Non-Volatile Main Memory",
journal = j-TECS,
volume = "21",
number = "1",
pages = "5:1--5:24",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483839",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3483839",
abstract = "In-memory wear-leveling has become an important
research field for emerging non-volatile main memories
over the past years. Many approaches in the literature
perform wear-leveling by making use of special
hardware. Since most non-volatile memories only
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Asifuzzaman:2022:PPE,
author = "Kazi Asifuzzaman and Rommel S{\'a}nchez Verdejo and
Petar Radojkovi{\'c}",
title = "Performance and Power Estimation of {STT--MRAM} Main
Memory with Reliable System-level Simulation",
journal = j-TECS,
volume = "21",
number = "1",
pages = "6:1--6:25",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476838",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3476838",
abstract = "It is questionable whether DRAM will continue to scale
and will meet the needs of next-generation systems.
Therefore, significant effort is invested in research
and development of novel memory technologies. One of
the candidates for next-generation memory \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shin:2022:EED,
author = "Dongsuk Shin and Hakbeom Jang and Kiseok Oh and Jae W.
Lee",
title = "An Energy-Efficient {DRAM} Cache Architecture for
Mobile Platforms With {PCM}-Based Main Memory",
journal = j-TECS,
volume = "21",
number = "1",
pages = "7:1--7:22",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451995",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3451995",
abstract = "A long battery life is a first-class design objective
for mobile devices, and main memory accounts for a
major portion of total energy consumption. Moreover,
the energy consumption from memory is expected to
increase further with ever-growing demands for
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wen:2022:SHD,
author = "Fei Wen and Mian Qin and Paul Gratz and Narasimha
Reddy",
title = "Software Hint-Driven Data Management for Hybrid Memory
in Mobile Systems",
journal = j-TECS,
volume = "21",
number = "1",
pages = "8:1--8:18",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494536",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3494536",
abstract = "Hybrid memory systems, comprised of emerging
non-volatile memory (NVM) and DRAM, have been proposed
to address the growing memory demand of current mobile
applications. Recently emerging NVM technologies, such
as phase-change memories (PCM), memristor, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zou:2022:DHA,
author = "Yu Zou and Amro Awad and Mingjie Lin",
title = "{DirectNVM}: Hardware-accelerated {NVMe SSDs} for
High-performance Embedded Computing",
journal = j-TECS,
volume = "21",
number = "1",
pages = "9:1--9:24",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3463911",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3463911",
abstract = "With data-intensive artificial intelligence (AI) and
machine learning (ML) applications rapidly surging,
modern high-performance embedded systems, with
heterogeneous computing resources, critically demand
low-latency and high-bandwidth data communication.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Missimer:2022:TRT,
author = "Katherine Missimer and Manos Athanassoulis and Richard
West",
title = "{Telomere}: Real-Time {NAND} Flash Storage",
journal = j-TECS,
volume = "21",
number = "1",
pages = "10:1--10:24",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3479157",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3479157",
abstract = "Modern solid-state disks achieve high data transfer
rates due to their massive internal parallelism.
However, out-of-place updates for flash memory incur
garbage collection costs when valid data needs to be
copied during space reclamation. The root cause of this
extra cost is that solid-state disks are not always
able to accurately determine data lifetime and group
together data that expires before the space needs to be
reclaimed. Real-time systems found in autonomous
vehicles, industrial control systems, and assembly-line
robots store data from hundreds of sensors and often
have predictable data lifetimes. These systems require
guaranteed high storage bandwidth for read and write
operations by mission-critical real-time tasks. In this
article, we depart from the traditional block device
interface to guarantee the high throughput needed to
process large volumes of data. Using data lifetime
information from the application layer, our proposed
real-time design, called Telomere, is able to
intelligently lay out data in NAND flash memory and
eliminate valid page copies during garbage collection.
Telomere's real-time admission control is able to
guarantee tasks their required read and write
operations within their periods. Under randomly
generated tasksets containing 500 tasks, Telomere
achieves 30\% higher throughput with a 5\% storage cost
compared to pre-existing techniques.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zou:2022:APS,
author = "Yu Zou and Kazi Abu Zubair and Mazen Alwadi and Rakin
Muhammad Shadab and Sanjay Gandham and Amro Awad and
Mingjie Lin",
title = "{ARES}: Persistently Secure Non-Volatile Memory with
Processor-transparent and Hardware-friendly Integrity
Verification and Metadata Recovery",
journal = j-TECS,
volume = "21",
number = "1",
pages = "11:1--11:32",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3492735",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3492735",
abstract = "Emerging byte-addressable Non-Volatile Memory (NVM)
technology, although promising superior memory density
and ultra-low energy consumption, poses unique
challenges to achieving persistent data privacy and
computing security, both of which are critically
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Eldstaal-Ahrens:2022:CCL,
author = "Albin Eldst{\aa}l-Ahrens and Angelos Arelakis and
Ioannis Sourdis",
title = "{L$^2$C}: Combining Lossy and Lossless Compression on
Memory and {I/O}",
journal = j-TECS,
volume = "21",
number = "1",
pages = "12:1--12:27",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3481641",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3481641",
abstract = "In this article, we introduce L$^2$C, a hybrid
lossy/lossless compression scheme applicable both to
the memory subsystem and I/O traffic of a processor
chip. L$^2$C employs general-purpose lossless
compression and combines it with state-of-the-art lossy
compression to achieve compression ratios up to 16:1
and to improve the utilization of chip's bandwidth
resources. Compressing memory traffic yields lower
memory access time, improving system performance, and
energy efficiency. Compressing I/O traffic offers
several benefits for resource-constrained systems,
including more efficient storage and networking. We
evaluate L$^2$C as a memory compressor in simulation
with a set of approximation-tolerant applications.
L$^2$C improves baseline execution time by an average
of 50\% and total system energy consumption by 16\%.
Compared to the lossy and lossless current
state-of-the-art memory compression approaches, L$^2$C
improves execution time by 9\% and 26\%, respectively,
and reduces system energy costs by 3\% and 5\%,
respectively. I/O compression efficacy is evaluated
using a set of real-life datasets. L$^2$C achieves
compression ratios of up to 10.4:1 for a single dataset
and on average about 4:1, while introducing no more
than 0.4\% error.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nie:2022:HRA,
author = "Lanshun Nie and Chenghao Fan and Shuang Lin and Li
Zhang and Yajuan Li and Jing Li",
title = "Holistic Resource Allocation Under Federated
Scheduling for Parallel Real-time Tasks",
journal = j-TECS,
volume = "21",
number = "1",
pages = "13:1--13:29",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3489467",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Feb 16 14:00:33 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3489467",
abstract = "With the technology trend of hardware and workload
consolidation for embedded systems and the rapid
development of edge computing, there has been
increasing interest in supporting parallel real-time
tasks to better utilize the multi-core platforms while
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Minakova:2022:SBR,
author = "Svetlana Minakova and Dolly Sapra and Todor Stefanov
and Andy D. Pimentel",
title = "Scenario Based Run-Time Switching for Adaptive
{CNN}-Based Applications at the Edge",
journal = j-TECS,
volume = "21",
number = "2",
pages = "14:1--14:33",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488718",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3488718",
abstract = "Convolutional Neural Networks (CNNs) are biologically
inspired computational models that are at the heart of
many modern computer vision and natural language
processing applications. Some of the CNN-based
applications are executed on mobile and embedded
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2022:PRA,
author = "Xing Chen and Umit Ogras and Chaitali Chakrabarti",
title = "Probabilistic Risk-Aware Scheduling with Deadline
Constraint for Heterogeneous {SoCs}",
journal = j-TECS,
volume = "21",
number = "2",
pages = "15:1--15:27",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3489409",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3489409",
abstract = "Hardware Trojans can compromise System-on-Chip (SoC)
performance. Protection schemes implemented to combat
these threats cannot guarantee 100\% detection rate and
may also introduce performance overhead. This paper
defines the risk of running a job on an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dong:2022:EEA,
author = "Jiankuo Dong and Fangyu Zheng and Jingqiang Lin and
Zhe Liu and Fu Xiao and Guang Fan",
title = "{EC-ECC}: Accelerating Elliptic Curve Cryptography for
Edge Computing on Embedded {GPU TX2}",
journal = j-TECS,
volume = "21",
number = "2",
pages = "16:1--16:25",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3492734",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3492734",
abstract = "Driven by artificial intelligence and computer vision
industries, Graphics Processing Units (GPUs) are now
rapidly achieving extraordinary computing power. In
particular, the NVIDIA Tegra K1/X1/X2 embedded GPU
platforms, which are also treated as edge \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Biswas:2022:PNC,
author = "Arnab Kumar Biswas and Biplab Sikdar",
title = "Protecting Network-on-Chip Intellectual Property Using
Timing Channel Fingerprinting",
journal = j-TECS,
volume = "21",
number = "2",
pages = "17:1--17:21",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3495565",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3495565",
abstract = "The theft of Intellectual property (IP) is a serious
security threat for all businesses that are involved in
the creation of IP. In this article, we consider such
attacks against IP for Network-on-Chip (NoC) that are
commonly used as a popular on-chip \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liao:2022:RRS,
author = "Jianwei Liao and Jun Li and Mingwang Zhao and Zhibing
Sha and Zhigang Cai",
title = "Read Refresh Scheduling and Data Reallocation against
Read Disturb in {SSDs}",
journal = j-TECS,
volume = "21",
number = "2",
pages = "18:1--18:27",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3495254",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3495254",
abstract = "Read disturb is a circuit-level noise in flash-based
Solid-State Drives (SSDs), induced by intensive read
requests, which may result in unexpected read errors.
The approach of read refresh (RR) is commonly adopted
to mitigate its negative effects by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hong:2022:EGE,
author = "Ziyang Hong and C. Patrick Yue",
title = "Efficient-Grad: Efficient Training Deep Convolutional
Neural Networks on Edge Devices with Gradient
Optimizations",
journal = j-TECS,
volume = "21",
number = "2",
pages = "19:1--19:24",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3504034",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3504034",
abstract = "With the prospering of mobile devices, the distributed
learning approach, enabling model training with
decentralized data, has attracted great interest from
researchers. However, the lack of training capability
for edge devices significantly limits the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2022:MSM,
author = "Qingling Zhao and Mengfei Qu and Zonghua Gu and Haibo
Zeng",
title = "Minimizing Stack Memory for Partitioned
Mixed-criticality Scheduling on Multiprocessor
Platforms",
journal = j-TECS,
volume = "21",
number = "2",
pages = "20:1--20:30",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3506703",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 24 15:59:57 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3506703",
abstract = "A Mixed-Criticality System (MCS) features the
integration of multiple subsystems that are subject to
different levels of safety certification on a shared
hardware platform. In cost-sensitive application
domains such as automotive E/E systems, it is
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2022:ISIb,
author = "Yuan-Hao Chang and Jalil Boukhobza and Song Han",
title = "Introduction to the Special Issue on Memory and
Storage Systems for Embedded and {IoT} Applications:
{Part 2}",
journal = j-TECS,
volume = "21",
number = "3",
pages = "21:1--21:2",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3531707",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3531707",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gupta:2022:SLC,
author = "Saransh Gupta and Behnam Khaleghi and Sahand Salamat
and Justin Morris and Ranganathan Ramkumar and Jeffrey
Yu and Aniket Tiwari and Jaeyoung Kang and Mohsen Imani
and Baris Aksanli and Tajana Simuni{\'c} Rosing",
title = "Store-n-Learn: Classification and Clustering with
Hyperdimensional Computing across Flash Hierarchy",
journal = j-TECS,
volume = "21",
number = "3",
pages = "22:1--22:25",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3503541",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3503541",
abstract = "Processing large amounts of data, especially in
learning algorithms, poses a challenge for current
embedded computing systems. Hyperdimensional (HD)
computing (HDC) is a brain-inspired computing paradigm
that works with high-dimensional vectors called
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rrushi:2022:PDP,
author = "Julian L. Rrushi",
title = "Physics-Driven Page Fault Handling for Customized
Deception against {CPS} Malware",
journal = j-TECS,
volume = "21",
number = "3",
pages = "23:1--23:36",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502742",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3502742",
abstract = "Malware crafted to attack cyber-physical systems such
as the electrical power grid have a physics-centric
nucleus. Cyber-physical systems malware understand
physics and hence use their knowledge to guide how they
initiate physical damage on a compromised \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2022:DRR,
author = "Wei-Ting Lin and Hsiang-Yun Cheng and Chia-Lin Yang
and Meng-Yao Lin and Kai Lien and Han-Wen Hu and
Hung-Sheng Chang and Hsiang-Pang Li and Meng-Fan Chang
and Yen-Ting Tsou and Chin-Fu Nien",
title = "{DL-RSIM}: a Reliability and Deployment Strategy
Simulation Framework for {ReRAM}-based {CNN}
Accelerators",
journal = j-TECS,
volume = "21",
number = "3",
pages = "24:1--24:29",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3507639",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3507639",
abstract = "Memristor-based deep learning accelerators provide a
promising solution to improve the energy efficiency of
neuromorphic computing systems. However, the electrical
properties and crossbar structure of memristors make
these accelerators error-prone. In \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wei:2022:SBD,
author = "Qian Wei and Bingzhe Li and Wanli Chang and Zhiping
Jia and Zhaoyan Shen and Zili Shao",
title = "A Survey of Blockchain Data Management Systems",
journal = j-TECS,
volume = "21",
number = "3",
pages = "25:1--25:28",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502741",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3502741",
abstract = "Blockchain has been widely deployed in various fields,
such as finance, education, and public services.
Blockchain has decentralized mechanisms with
persistency and auditability and runs as an immutable
distributed ledger, where transactions are jointly
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bai:2022:FCW,
author = "Zhenyu Bai and Hugues Cass{\'e} and Marianne {De
Michiel} and Thomas Carle and Christine Rochange",
title = "A Framework for Calculating {WCET} Based on Execution
Decision Diagrams",
journal = j-TECS,
volume = "21",
number = "3",
pages = "26:1--26:26",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3476879",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3476879",
abstract = "Due to the dynamic behaviour of acceleration
mechanisms such as caches and branch predictors, static
Worst-case Execution Time (WCET) analysis methods tend
to scale poorly to modern hardware architectures. As a
result, a trade-off must be found between \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Song:2022:DDB,
author = "Shihao Song and Harry Chong and Adarsha Balaji and
Anup Das and James Shackleford and Nagarajan
Kandasamy",
title = "{DFSynthesizer}: Dataflow-based Synthesis of Spiking
Neural Networks to Neuromorphic Hardware",
journal = j-TECS,
volume = "21",
number = "3",
pages = "27:1--27:35",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3479156",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3479156",
abstract = "Spiking Neural Networks (SNNs) are an emerging
computation model that uses event-driven activation and
bio-inspired learning algorithms. SNN-based machine
learning programs are typically executed on tile-based
neuromorphic hardware platforms, where each \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xiao:2022:CIA,
author = "Jun Xiao and Yixian Shen and Andy D. Pimentel",
title = "Cache Interference-aware Task Partitioning for
Non-preemptive Real-time Multi-core Systems",
journal = j-TECS,
volume = "21",
number = "3",
pages = "28:1--28:28",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487581",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3487581",
abstract = "Shared caches in multi-core processors introduce
serious difficulties in providing guarantees on the
real-time properties of embedded software due to the
interaction and the resulting contention in the shared
caches. Prior work has studied the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ullah:2022:ADA,
author = "Salim Ullah and Siva Satyendra Sahoo and Nemath Ahmed
and Debabrata Chaudhury and Akash Kumar",
title = "{AppAxO}: Designing Application-specific Approximate
Operators for {FPGA}-based Embedded Systems",
journal = j-TECS,
volume = "21",
number = "3",
pages = "29:1--29:31",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3513262",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3513262",
abstract = "Approximate arithmetic operators, such as adders and
multipliers, are increasingly used to satisfy the
energy and performance requirements of
resource-constrained embedded systems. However, most of
the available approximate operators have an
application-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2022:HEI,
author = "Yi-Syuan Lin and Yu-Pei Liang and Tseng-Yi Chen and
Yuan-Hao Chang and Shuo-Han Chen and Hsin-Wen Wei and
Wei-Kuan Shih",
title = "How to Enable Index Scheme for Reducing the Writing
Cost of {DNA} Storage on Insertion and Deletion",
journal = j-TECS,
volume = "21",
number = "3",
pages = "30:1--30:25",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3516482",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3516482",
abstract = "Recently, the requirement of storing digital data has
been growing rapidly; however, the conventional storage
medium cannot satisfy these huge demands. Fortunately,
thanks to biological technology development, storing
digital data into deoxyribonucleic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Raj:2022:RMV,
author = "Pani Prithvi Raj and Pakala Akhil Reddy and Nitin
Chandrachoodan",
title = "Reduced Memory {Viterbi} Decoding for
Hardware-accelerated Speech Recognition",
journal = j-TECS,
volume = "21",
number = "3",
pages = "31:1--31:18",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510028",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510028",
abstract = "Large Vocabulary Continuous Speech Recognition systems
require Viterbi searching through a large state space
to find the most probable sequence of phonemes that led
to a given sound sample. This needs storing and
updating of a large Active State List (ASL). \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Desai:2022:CLR,
author = "Harsh Desai and Matteo Nardello and Davide Brunelli
and Brandon Lucia",
title = "{Camaroptera}: a Long-range Image Sensor with Local
Inference for Remote Sensing Applications",
journal = j-TECS,
volume = "21",
number = "3",
pages = "32:1--32:25",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510850",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510850",
abstract = "Batteryless image sensors present an opportunity for
long-life, long-range sensor deployments that require
zero maintenance, and have low cost. Such deployments
are critical for enabling remote sensing applications,
e.g., instrumenting national highways, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mao:2022:TEA,
author = "Jiachen Mao and Qing Yang and Ang Li and Kent W. Nixon
and Hai Li and Yiran Chen",
title = "Toward Efficient and Adaptive Design of Video
Detection System with Deep Neural Networks",
journal = j-TECS,
volume = "21",
number = "3",
pages = "33:1--33:21",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3484946",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3484946",
abstract = "In the past decade, Deep Neural Networks (DNNs), e.g.,
Convolutional Neural Networks, achieved human-level
performance in vision tasks such as object
classification and detection. However, DNNs are known
to be computationally expensive and thus hard to be
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2022:SRT,
author = "Cong Chen and Zhong Hong and Jian-Min Jiang",
title = "Scheduling in Real-Time Mobile Systems",
journal = j-TECS,
volume = "21",
number = "3",
pages = "34:1--34:36",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3517747",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jul 20 06:57:46 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3517747",
abstract = "To guarantee the safety and security of a real-time
mobile system such as an intelligent transportation
system, it is necessary to model and analyze its
behaviors prior to actual development. In particular,
the mobile objects in such systems must be \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Trajkovic:2022:PMA,
author = "Jelena Trajkovic and Sara Karimi and Samantha Hangsan
and Wenlu Zhang",
title = "Prediction Modeling for Application-Specific
Communication Architecture Design of Optical {NoC}",
journal = j-TECS,
volume = "21",
number = "4",
pages = "35:1--35:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520241",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520241",
abstract = "Multi-core systems-on-chip are becoming
state-of-the-art. Therefore, there is a need for a fast
and energy-efficient interconnect to take full
advantage of the computational capabilities.
Integration of silicon photonics with a traditional
electrical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Krishnan:2022:BCS,
author = "Archanaa S. Krishnan and Patrick Schaumont",
title = "Benchmarking and Configuring Security Levels in
Intermittent Computing",
journal = j-TECS,
volume = "21",
number = "4",
pages = "36:1--36:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3522748",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3522748",
abstract = "Intermittent computing derives its name from the
intermittent character of the power source used to
drive the computing, typically an energy harvester of
ambient energy sources. Intermittent computing is
characterized by frequent transitions between the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2022:HFY,
author = "Shihua Huang and Luc Waeijen and Henk Corporaal",
title = "How Flexible is Your Computing System?",
journal = j-TECS,
volume = "21",
number = "4",
pages = "37:1--37:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524861",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524861",
abstract = "In literature, computer architectures are frequently
claimed to be highly flexible, typically implying the
existence of trade-offs between flexibility and
performance or energy efficiency. Processor
flexibility, however, is not very sharply defined, and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Isuwa:2022:QMQ,
author = "Samuel Isuwa and Somdip Dey and Andre P. Ortega and
Amit Kumar Singh and Bashir M. Al-Hashimi and Geoff V.
Merrett",
title = "{QUAREM}: Maximising {QoE} Through Adaptive Resource
Management in Mobile {MPSoC} Platforms",
journal = j-TECS,
volume = "21",
number = "4",
pages = "38:1--38:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3526116",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3526116",
abstract = "Heterogeneous multi-processor system-on-chip (MPSoC)
smartphones are required to offer increasing
performance and user quality-of-experience (QoE),
despite comparatively slow advances in battery
technology. Approaches to balance instantaneous power
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2022:ARF,
author = "Yanfeng Chen and Tianyu Zhang and Fanxin Kong and Lin
Zhang and Qingxu Deng",
title = "Attack-resilient Fusion of Sensor Data with Uncertain
Delays",
journal = j-TECS,
volume = "21",
number = "4",
pages = "39:1--39:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532181",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3532181",
abstract = "Malicious attackers may disrupt the safety of
autonomous systems through compromising sensors to feed
wrong measurements to the controller. This article
proposes attack-resilient sensor fusion that combines
local sensor readings and shared sensing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{France-Pillois:2022:SAR,
author = "Maxime France-Pillois and Abdoulaye Gamati{\'e} and
Gilles Sassatelli",
title = "A Segmented Adaptive Router for Near
Energy-Proportional Networks-on-Chip",
journal = j-TECS,
volume = "21",
number = "4",
pages = "40:1--40:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529106",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3529106",
abstract = "A Network-on-Chip (NoC) is an essential component of a
chip multiprocessor (CMP) which however contributes to
a large fraction of system energy. The unpredictability
of traffic across a NoC frequently involves an
expensive over-sizing of NoC resources \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mishra:2022:SCF,
author = "Tanmaya Mishra and Thidapat Chantem and Ryan Gerdes",
title = "Survey of Control-flow Integrity Techniques for
Real-time Embedded Systems",
journal = j-TECS,
volume = "21",
number = "4",
pages = "41:1--41:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3538275",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3538275",
abstract = "Computing systems, including real-time embedded
systems, are becoming increasingly connected to allow
for more advanced and safer operation. Such embedded
systems are also often resource-constrained, for
example, with lower processing capabilities \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2022:RID,
author = "Tse-Yuan Wang and Chun-Feng Wu and Che-Wei Tsao and
Yuan-Hao Chang and Tei-Wei Kuo and Xue Liu",
title = "Rethinking the Interactivity of {OS} and Device Layers
in Memory Management",
journal = j-TECS,
volume = "21",
number = "4",
pages = "42:1--42:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530876",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530876",
abstract = "In the big data era, a huge number of services has
placed a fast-growing demand on the capacity of
DRAM-based main memory. However, due to the high
hardware cost and serious leakage power/energy
consumption, the growth rate of DRAM capacity cannot
meet \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ouyang:2022:WWF,
author = "Xiangzhen Ouyang and Yian Zhu",
title = "\pkg{wfspan}: Wait-free Dynamic Memory Management",
journal = j-TECS,
volume = "21",
number = "4",
pages = "43:1--43:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533724",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3533724",
abstract = "Dynamic memory allocation plays a vital role in modern
application programs. Modern lock-free memory
allocators based on hardware atomic primitives usually
provide good performance. However, threads may starve
in these lock-free implementations, leading \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Choi:2022:ECA,
author = "Kyubaik Choi and Gerald E. Sobelman",
title = "An Efficient {CNN} Accelerator for Low-Cost Edge
Systems",
journal = j-TECS,
volume = "21",
number = "4",
pages = "44:1--44:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3539224",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3539224",
abstract = "Customized hardware based convolutional neural network
(CNN or ConvNet) accelerators have attracted
significant attention for applications in a low-cost,
edge computing system. However, there is a lack of
research that seeks to optimize at both the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2022:CBI,
author = "Qingling Zhao and Mingqiang Chen and Zonghua Gu and
Siyu Luan and Haibo Zeng and Samarjit Chakrabory",
title = "{CAN} Bus Intrusion Detection Based on Auxiliary
Classifier {GAN} and Out-of-distribution Detection",
journal = j-TECS,
volume = "21",
number = "4",
pages = "45:1--45:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3540198",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3540198",
abstract = "The Controller Area Network (CAN) is a ubiquitous bus
protocol present in the Electrical/Electronic (E/E)
systems of almost all vehicles. It is vulnerable to a
range of attacks once the attacker gains access to the
bus through the vehicle's attack \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Daghero:2022:HAR,
author = "Francesco Daghero and Alessio Burrello and Chen Xie
and Marco Castellano and Luca Gandolfi and Andrea
Calimera and Enrico Macii and Massimo Poncino and
Daniele Jahier Pagliari",
title = "Human Activity Recognition on Microcontrollers with
Quantized and Adaptive Deep Neural Networks",
journal = j-TECS,
volume = "21",
number = "4",
pages = "46:1--46:??",
month = jul,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3542819",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Oct 29 08:11:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3542819",
abstract = "Human Activity Recognition (HAR) based on inertial
data is an increasingly diffused task on embedded
devices, from smartphones to ultra low-power sensors.
Due to the high computational complexity of deep
learning models, most embedded HAR systems are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shafique:2022:ISIa,
author = "Muhammad Shafique and Theocharis Theocharides and Hai
Li and Chun Jason Xue",
title = "Introduction to the Special Issue on Accelerating {AI}
on the Edge --- {Part 1}",
journal = j-TECS,
volume = "21",
number = "5",
pages = "47:1--47:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3558078",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3558078",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mendez:2022:EIC,
author = "Javier Mendez and Kay Bierzynski and M. P. Cu{\'e}llar
and Diego P. Morales",
title = "Edge Intelligence: Concepts, Architectures,
Applications, and Future Directions",
journal = j-TECS,
volume = "21",
number = "5",
pages = "48:1--48:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3486674",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3486674",
abstract = "The name edge intelligence, also known as Edge AI, is
a recent term used in the past few years to refer to
the confluence of machine learning, or broadly speaking
artificial intelligence, with edge computing. In this
article, we revise the concepts \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kang:2022:MLM,
author = "Chih-Kai Kang and Hashan Roshantha Mendis and Chun-Han
Lin and Ming-Syan Chen and Pi-Cheng Hsiu",
title = "More Is Less: Model Augmentation for Intermittent Deep
Inference",
journal = j-TECS,
volume = "21",
number = "5",
pages = "49:1--49:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3506732",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3506732",
abstract = "Energy harvesting creates an emerging intermittent
computing paradigm but poses new challenges for
sophisticated applications such as intermittent deep
neural network (DNN) inference. Although model
compression has adapted DNNs to resource-constrained
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhu:2022:TUO,
author = "Shien Zhu and Luan H. K. Duong and Weichen Liu",
title = "{TAB}: Unified and Optimized Ternary, Binary, and
Mixed-precision Neural Network Inference on the Edge",
journal = j-TECS,
volume = "21",
number = "5",
pages = "50:1--50:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3508390",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3508390",
abstract = "Ternary Neural Networks (TNNs) and mixed-precision
Ternary Binary Networks (TBNs) have demonstrated higher
accuracy compared to Binary Neural Networks (BNNs)
while providing fast, low-power, and memory-efficient
inference. Related works have improved the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jeong:2022:TBF,
author = "Eunjin Jeong and Jangryul Kim and Soonhoi Ha",
title = "{TensorRT}-Based Framework and Optimization
Methodology for Deep Learning Inference on {Jetson}
Boards",
journal = j-TECS,
volume = "21",
number = "5",
pages = "51:1--51:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3508391",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3508391",
abstract = "As deep learning inference applications are increasing
in embedded devices, an embedded device tends to equip
neural processing units (NPUs) in addition to a
multi-core CPU and a GPU. NVIDIA Jetson AGX Xavier is
an example. For fast and efficient \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kundu:2022:TAA,
author = "Souvik Kundu and Yao Fu and Bill Ye and Peter A.
Beerel and Massoud Pedram",
title = "Toward Adversary-aware Non-iterative Model Pruning
through Dynamic Network Rewiring of {DNNs}",
journal = j-TECS,
volume = "21",
number = "5",
pages = "52:1--52:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510833",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510833",
abstract = "We present a dynamic network rewiring (DNR) method to
generate pruned deep neural network (DNN) models that
both are robust against adversarially generated images
and maintain high accuracy on clean images. In
particular, the disclosed DNR training method
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paissan:2022:PSB,
author = "Francesco Paissan and Alberto Ancilotto and Elisabetta
Farella",
title = "{PhiNets}: a Scalable Backbone for Low-power {AI} at
the Edge",
journal = j-TECS,
volume = "21",
number = "5",
pages = "53:1--53:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510832",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510832",
abstract = "In the Internet of Things era, where we see many
interconnected and heterogeneous mobile and fixed smart
devices, distributing the intelligence from the cloud
to the edge has become a necessity. Due to limited
computational and communication capabilities,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gomez:2022:DDP,
author = "Andres Gomez and Andreas Tretter and Pascal Alexander
Hager and Praveenth Sanmugarajah and Luca Benini and
Lothar Thiele",
title = "Dataflow Driven Partitioning of Machine Learning
Applications for Optimal Energy Use in Batteryless
Systems",
journal = j-TECS,
volume = "21",
number = "5",
pages = "54:1--54:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520135",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520135",
abstract = "Sensing systems powered by energy harvesting have
traditionally been designed to tolerate long periods
without energy. As the Internet of Things (IoT) evolves
toward a more transient and opportunistic execution
paradigm, reducing energy storage costs will \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kutukcu:2022:CGA,
author = "Basar Kutukcu and Sabur Baidya and Anand Raghunathan
and Sujit Dey",
title = "Contention Grading and Adaptive Model Selection for
Machine Vision in Embedded Systems",
journal = j-TECS,
volume = "21",
number = "5",
pages = "55:1--55:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520134",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520134",
abstract = "Real-time machine vision applications running on
resource-constrained embedded systems face challenges
for maintaining performance. An especially challenging
scenario arises when multiple applications execute at
the same time, creating contention for the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jokic:2022:CKE,
author = "Petar Jokic and Erfan Azarkhish and Andrea Bonetti and
Marc Pons and Stephane Emery and Luca Benini",
title = "A Construction Kit for Efficient Low Power Neural
Network Accelerator Designs",
journal = j-TECS,
volume = "21",
number = "5",
pages = "56:1--56:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520127",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520127",
abstract = "Implementing embedded neural network processing at the
edge requires efficient hardware acceleration that
combines high computational throughput with low power
consumption. Driven by the rapid evolution of network
architectures and their algorithmic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Resch:2022:EER,
author = "Salonik Resch and S. Karen Khatamifard and Zamshed I.
Chowdhury and Masoud Zabihi and Zhengyang Zhao and
Husrev Cilasun and Jian-Ping Wang and Sachin S.
Sapatnekar and Ulya R. Karpuzcu",
title = "Energy-efficient and Reliable Inference in Nonvolatile
Memory under Extreme Operating Conditions",
journal = j-TECS,
volume = "21",
number = "5",
pages = "57:1--57:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520130",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520130",
abstract = "Beyond-edge devices can operate outside the reach of
the power grid and without batteries. Such devices can
be deployed in large numbers in regions that are
difficult to access. Using machine learning, these
devices can solve complex problems and relay \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Herzog:2022:RDE,
author = "Benedict Herzog and Stefan Reif and Judith Hemp and
Timo H{\"o}nig and Wolfgang Schr{\"o}der-Preikschat",
title = "Resource-demand Estimation for Edge Tensor Processing
Units",
journal = j-TECS,
volume = "21",
number = "5",
pages = "58:1--58:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520132",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520132",
abstract = "Machine learning has shown tremendous success in a
large variety of applications. The evolution of
machine-learning applications from cloud-based systems
to mobile and embedded devices has shifted the focus
from only quality-related aspects towards the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hemmat:2022:CCA,
author = "Maedeh Hemmat and Joshua {San Miguel} and Azadeh
Davoodi",
title = "{CAP'NN}: a Class-aware Framework for Personalized
Neural Network Inference",
journal = j-TECS,
volume = "21",
number = "5",
pages = "59:1--59:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520126",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520126",
abstract = "We propose a framework for Class-aware Personalized
Neural Network Inference (CAP'NN), which prunes an
already-trained neural network model based on the
preferences of individual users. Specifically, by
adapting to the subset of output classes that each
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Park:2022:QST,
author = "Jun-Hyung Park and Kang-Min Kim and Sangkeun Lee",
title = "Quantized Sparse Training: a Unified Trainable
Framework for Joint Pruning and Quantization in
{DNNs}",
journal = j-TECS,
volume = "21",
number = "5",
pages = "60:1--60:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524066",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524066",
abstract = "Deep neural networks typically have extensive
parameters and computational operations. Pruning and
quantization techniques have been widely used to reduce
the complexity of deep models. Both techniques can be
jointly used for realizing significantly \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baharani:2022:ARE,
author = "Mohammadreza Baharani and Hamed Tabkhi",
title = "{ATCN}: Resource-efficient Processing of Time Series
on Edge",
journal = j-TECS,
volume = "21",
number = "5",
pages = "61:1--61:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524070",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524070",
abstract = "This article presents a scalable deep learning model
called Agile Temporal Convolutional Network (ATCN) for
highly accurate fast classification and time series
prediction in resource-constrained embedded systems.
ATCN is a family of compact networks with \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Goyal:2022:HFU,
author = "Vidushi Goyal and Reetuparna Das and Valeria
Bertacco",
title = "Hardware-friendly User-specific Machine Learning for
Edge Devices",
journal = j-TECS,
volume = "21",
number = "5",
pages = "62:1--62:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524125",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524125",
abstract = "Machine learning (ML) on resource-constrained edge
devices is expensive and often requires offloading
computation to the cloud, which may compromise the
privacy of user data. In contrast, the type of data
processed at edge devices is user-specific and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{George:2022:UPE,
author = "Biji George and Om Ji Omer and Ziaul Choudhury and
{Anoop V} and Sreenivas Subramoney",
title = "A Unified Programmable Edge Matrix Processor for Deep
Neural Networks and Matrix Algebra",
journal = j-TECS,
volume = "21",
number = "5",
pages = "63:1--63:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524453",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524453",
abstract = "Matrix Algebra and Deep Neural Networks represent
foundational classes of computational algorithms across
multiple emerging applications like Augmented Reality
or Virtual Reality, autonomous navigation (cars,
drones, robots), data science, and various \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bouzidi:2022:PMC,
author = "Halima Bouzidi and Hamza Ouarnoughi and Smail Niar and
Abdessamad {Ait El Cadi}",
title = "Performance Modeling of Computer Vision-based {CNN} on
Edge {GPUs}",
journal = j-TECS,
volume = "21",
number = "5",
pages = "64:1--64:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3527169",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3527169",
abstract = "Convolutional Neural Networks (CNNs) are currently
widely used in various fields, particularly for
computer vision applications. Edge platforms have drawn
tremendous attention from academia and industry due to
their ability to improve execution time and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yuan:2022:MFC,
author = "Geng Yuan and Peiyan Dong and Mengshu Sun and Wei Niu
and Zhengang Li and Yuxuan Cai and Yanyu Li and Jun Liu
and Weiwen Jiang and Xue Lin and Bin Ren and Xulong
Tang and Yanzhi Wang",
title = "Mobile or {FPGA}? {A} Comprehensive Evaluation on
Energy Efficiency and a Unified Optimization
Framework",
journal = j-TECS,
volume = "21",
number = "5",
pages = "65:1--65:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3528578",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3528578",
abstract = "Efficient deployment of Deep Neural Networks (DNNs) on
edge devices (i.e., FPGAs and mobile platforms) is very
challenging, especially under a recent witness of the
increasing DNN model size and complexity. Model
compression strategies, including weight \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghasemi:2022:EEE,
author = "Mehdi Ghasemi and Daler Rakhmatov and Carole-Jean Wu
and Sarma Vrudhula",
title = "{EdgeWise}: Energy-efficient {CNN} Computation on Edge
Devices under Stochastic Communication Delays",
journal = j-TECS,
volume = "21",
number = "5",
pages = "66:1--66:??",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530908",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:21 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530908",
abstract = "This article presents a framework to enable the
energy-efficient execution of convolutional neural
networks (CNNs) on edge devices. The framework consists
of a pair of edge devices connected via a wireless
network: a performance and energy-constrained
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shafique:2022:ISIb,
author = "Muhammad Shafique and Theocharis Theocharides and Hai
(Helen) Li and Chun Jason Xue",
title = "Introduction to the Special Issue on Accelerating {AI}
on the Edge --- {Part 2}",
journal = j-TECS,
volume = "21",
number = "6",
pages = "67:1--67:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563127",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3563127",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2022:ERD,
author = "Kuan-Hsun Chen and Chiahui Su and Christian Hakert and
Sebastian Buschj{\"a}ger and Chao-Lin Lee and Jenq-Kuen
Lee and Katharina Morik and Jian-Jia Chen",
title = "Efficient Realization of Decision Trees for Real-Time
Inference",
journal = j-TECS,
volume = "21",
number = "6",
pages = "68:1--68:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3508019",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3508019",
abstract = "For timing-sensitive edge applications, the demand for
efficient lightweight machine learning solutions has
increased recently. Tree ensembles are among the
state-of-the-art in many machine learning applications.
While single decision trees are comparably \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2022:BWH,
author = "Hongyi Pan and Diaa Badawi and Ahmet Enis Cetin",
title = "Block {Walsh-Hadamard} Transform-based Binary Layers
in Deep Neural Networks",
journal = j-TECS,
volume = "21",
number = "6",
pages = "69:1--69:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510026",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510026",
abstract = "Convolution has been the core operation of modern deep
neural networks. It is well known that convolutions can
be implemented in the Fourier Transform domain. In this
article, we propose to use binary block Walsh-Hadamard
transform (WHT) instead of the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mukherjee:2022:AFD,
author = "Arijit Mukherjee and Jayeeta Mondal and Swarnava Dey",
title = "Accelerated Fire Detection and Localization at Edge",
journal = j-TECS,
volume = "21",
number = "6",
pages = "70:1--70:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510027",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510027",
abstract = "Fire-related incidents continue to be reported as a
leading cause of life and property destruction.
Automated fire detection and localization (AFDL)
systems have grown in importance with the evolution of
applied robotics, especially because use of robots
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Almeida:2022:DDO,
author = "Mario Almeida and Stefanos Laskaridis and Stylianos I.
Venieris and Ilias Leontiadis and Nicholas D. Lane",
title = "{DynO}: Dynamic Onloading of Deep Neural Networks from
Cloud to Device",
journal = j-TECS,
volume = "21",
number = "6",
pages = "71:1--71:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510831",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3510831",
abstract = "Recently, there has been an explosive growth of mobile
and embedded applications using convolutional neural
networks (CNNs). To alleviate their excessive
computational demands, developers have traditionally
resorted to cloud offloading, inducing high \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ganesan:2022:DST,
author = "Vinod Ganesan and Pratyush Kumar",
title = "Design and Scaffolded Training of an Efficient {DNN}
Operator for Computer Vision on the Edge",
journal = j-TECS,
volume = "21",
number = "6",
pages = "72:1--72:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511212",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3511212",
abstract = "Massively parallel systolic arrays and
resource-efficient depthwise separable convolutions are
two promising hardware and software techniques to
accelerate DNN inference on the edge. Interestingly,
their combination is inefficient: Computational
patterns \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shahhosseini:2022:OLO,
author = "Sina Shahhosseini and Dongjoo Seo and Anil Kanduri and
Tianyi Hu and Sung-Soo Lim and Bryan Donyanavard and
Amir M. Rahmani and Nikil Dutt",
title = "Online Learning for Orchestration of Inference in
Multi-user End-edge-cloud Networks",
journal = j-TECS,
volume = "21",
number = "6",
pages = "73:1--73:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520129",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520129",
abstract = "Deep-learning-based intelligent services have become
prevalent in cyber-physical applications, including
smart cities and health-care. Deploying
deep-learning-based intelligence near the end-user
enhances privacy protection, responsiveness, and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tsouvalas:2022:FST,
author = "Vasileios Tsouvalas and Aaqib Saeed and Tanir
Ozcelebi",
title = "Federated Self-training for Semi-supervised Audio
Recognition",
journal = j-TECS,
volume = "21",
number = "6",
pages = "74:1--74:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520128",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520128",
abstract = "Federated Learning is a distributed machine learning
paradigm dealing with decentralized and personal
datasets. Since data reside on devices such as
smartphones and virtual assistants, labeling is
entrusted to the clients or labels are extracted in an
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lemaire:2022:SAH,
author = "Edgar Lemaire and Beno{\^\i}t Miramond and
S{\'e}bastien Bilavarn and Hadi Saoud and Nassim
Abderrahmane",
title = "Synaptic Activity and Hardware Footprint of Spiking
Neural Networks in Digital Neuromorphic Systems",
journal = j-TECS,
volume = "21",
number = "6",
pages = "75:1--75:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520133",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520133",
abstract = "Spiking neural networks are expected to bring high
resources, power, and energy efficiency to machine
learning hardware implementations. In this regard, they
could facilitate the integration of Artificial
Intelligence in highly constrained embedded \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2022:DDC,
author = "Yi Yang and Murugan Sankaradas and Srimat Chakradhar",
title = "{DyCo}: Dynamic, Contextualized {AI} Models",
journal = j-TECS,
volume = "21",
number = "6",
pages = "76:1--76:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520131",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520131",
abstract = "Devices with limited computing resources use smaller
AI models to achieve low-latency inferencing. However,
model accuracy is typically much lower than the
accuracy of a bigger model that is trained and deployed
in places where the computing resources are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Song:2022:DTC,
author = "Shihao Song and Adarsha Balaji and Anup Das and
Nagarajan Kandasamy",
title = "Design-Technology Co-Optimization for {NVM-Based}
Neuromorphic Processing Elements",
journal = j-TECS,
volume = "21",
number = "6",
pages = "77:1--77:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524068",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524068",
abstract = "An emerging use case of machine learning (ML) is to
train a model on a high-performance system and deploy
the trained model on energy-constrained embedded
systems. Neuromorphic hardware platforms, which operate
on principles of the biological brain, can \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Morris:2022:HUH,
author = "Justin Morris and Kazim Ergun and Behnam Khaleghi and
Mohen Imani and Baris Aksanli and Tajana Simunic",
title = "{HyDREA}: Utilizing Hyperdimensional Computing for a
More Robust and Efficient Machine Learning System",
journal = j-TECS,
volume = "21",
number = "6",
pages = "78:1--78:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524067",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524067",
abstract = "Today's systems rely on sending all the data to the
cloud and then using complex algorithms, such as Deep
Neural Networks, which require billions of parameters
and many hours to train a model. In contrast, the human
brain can do much of this learning \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khan:2022:BIC,
author = "Asif Ali Khan and S{\'e}bastien Ollivier and Stephen
Longofono and Gerald Hempel and Jeronimo Castrillon and
Alex K. Jones",
title = "Brain-inspired Cognition in Next-generation Racetrack
Memories",
journal = j-TECS,
volume = "21",
number = "6",
pages = "79:1--79:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524071",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524071",
abstract = "Hyperdimensional computing (HDC) is an emerging
computational framework inspired by the brain that
operates on vectors with thousands of dimensions to
emulate cognition. Unlike conventional computational
frameworks that operate on numbers, HDC, like the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alam:2022:WCD,
author = "Syed Asad Alam and Andrew Anderson and Barbara
Barabasz and David Gregg",
title = "{Winograd} Convolution for Deep Neural Networks:
Efficient Point Selection",
journal = j-TECS,
volume = "21",
number = "6",
pages = "80:1--80:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524069",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524069",
abstract = "Convolutional neural networks (CNNs) have dramatically
improved the accuracy of image, video, and audio
processing for tasks such as object recognition, image
segmentation, and interactive speech systems. CNNs
require large amounts of computing resources \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hassantabar:2022:MMH,
author = "Shayan Hassantabar and Joe Zhang and Hongxu Yin and
Niraj K. Jha",
title = "{MHDeep}: Mental Health Disorder Detection System
Based on Wearable Sensors and Artificial Neural
Networks",
journal = j-TECS,
volume = "21",
number = "6",
pages = "81:1--81:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3527170",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3527170",
abstract = "Mental health problems impact the quality of life of
millions of people around the world. However, diagnosis
of mental health disorders is a challenging problem
that often relies on self-reporting by patients about
their behavioral patterns and social \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{HeydariGorji:2022:LCS,
author = "Ali HeydariGorji and Siavash Rezaei and Mahdi
Torabzadehkashi and Hossein Bobarshad and Vladimir
Alves and Pai H. Chou",
title = "Leveraging Computational Storage for Power-Efficient
Distributed Data Analytics",
journal = j-TECS,
volume = "21",
number = "6",
pages = "82:1--82:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3528577",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3528577",
abstract = "This article presents a family of computational
storage drives (CSDs) and demonstrates their
performance and power improvements due to in-storage
processing (ISP) when running big data analytics
applications. CSDs are an emerging class of solid state
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2022:FDN,
author = "Shuwei Li and Changhai Man and Ao Shen and Ziyi Guan
and Wei Mao and Shaobo Luo and Rumin Zhang and Hao Yu",
title = "A Fall Detection Network by {$2$D\slash} {$3$D}
Spatio-temporal Joint Models with Tensor Compression on
Edge",
journal = j-TECS,
volume = "21",
number = "6",
pages = "83:1--83:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3531004",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3531004",
abstract = "Falling is ranked highly among the threats in elderly
healthcare, which promotes the development of automatic
fall detection systems with extensive concern. With the
fast development of the Internet of Things (IoT) and
Artificial Intelligence (AI), camera \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Soliman:2022:FFF,
author = "Taha Soliman and Nellie Laleni and Tobias Kirchner and
Franz M{\"u}ller and Ashish Shrivastava and Thomas
K{\"a}mpfe and Andre Guntoro and Norbert Wehn",
title = "{FELIX}: a Ferroelectric {FET} Based Low Power
Mixed-Signal In-Memory Architecture for {DNN}
Acceleration",
journal = j-TECS,
volume = "21",
number = "6",
pages = "84:1--84:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529760",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3529760",
abstract = "Today, a large number of applications depend on deep
neural networks (DNN) to process data and perform
complicated tasks at restricted power and latency
specifications. Therefore, processing-in-memory (PIM)
platforms are actively explored as a promising
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leite:2022:REC,
author = "Clayton Frederick Souza Leite and Yu Xiao",
title = "Resource-Efficient Continual Learning for Sensor-Based
Human Activity Recognition",
journal = j-TECS,
volume = "21",
number = "6",
pages = "85:1--85:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530910",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530910",
abstract = "Recent advances in deep learning have granted
unrivaled performance to sensor-based human activity
recognition (HAR). However, in a real-world scenario,
the HAR solution is subject to diverse changes over
time such as the need to learn new activity \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pal:2022:OEI,
author = "Subhankar Pal and Swagath Venkataramani and Viji
Srinivasan and Kailash Gopalakrishnan",
title = "{OnSRAM}: Efficient Inter-Node On-Chip Scratchpad
Management in Deep Learning Accelerators",
journal = j-TECS,
volume = "21",
number = "6",
pages = "86:1--86:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530909",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:23 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530909",
abstract = "Hardware acceleration of Artificial Intelligence (AI)
workloads has gained widespread popularity with its
potential to deliver unprecedented performance and
efficiency. An important challenge remains in how AI
accelerators are programmed to sustain high \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cai:2023:OOF,
author = "Xuyi Cai and Ying Wang and Lei Zhang",
title = "{Optimus}: an Operator Fusion Framework for Deep
Neural Networks",
journal = j-TECS,
volume = "22",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3520142",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3520142",
abstract = "The reduction of neural parameters and operations for
the applications on embedded and IoT platforms in
current deep neural network (DNN) architectures has
received increasing attention. Relatively, the
intermediate feature maps of such lightweight neural
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Oh:2023:MFL,
author = "Deok-Jae Oh and Yaebin Moon and Do Kyu Ham and Tae Jun
Ham and Yongjun Park and Jae W. Lee and Jung Ho Ahn and
Eojin Lee",
title = "{MaPHeA}: a Framework for Lightweight Memory
Hierarchy-aware Profile-guided Heap Allocation",
journal = j-TECS,
volume = "22",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3527853",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3527853",
abstract = "Hardware performance monitoring units (PMUs) are a
standard feature in modern microprocessors, providing a
rich set of microarchitectural event samplers.
Recently, numerous profile-guided optimization (PGO)
frameworks have exploited them to feature much
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Monniaux:2023:FVL,
author = "David Monniaux and Cyril Six",
title = "Formally Verified Loop-Invariant Code Motion and
Assorted Optimizations",
journal = j-TECS,
volume = "22",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529507",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3529507",
abstract = "We present an approach for implementing a formally
certified loop-invariant code motion optimization by
composing an unrolling pass and a formally certified
yet efficient global subexpression elimination. This
approach is lightweight: each pass comes with
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wen:2023:WCP,
author = "Elliott Wen and Gerald Weber and Suranga Nanayakkara",
title = "{WasmAndroid}: a Cross-Platform Runtime for Native
Programming Languages on {Android}",
journal = j-TECS,
volume = "22",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530286",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530286",
abstract = "Open source hardware such as RISC-V has been gaining
substantial momentum. Recently, they have begun to
embrace Google's Android operating system to leverage
its software ecosystem. Despite the encouraging
progress, a challenging issue arises: a majority
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2023:FNN,
author = "Weiwei Chen and Ying Wang and Ying Xu and Chengsi Gao
and Cheng Liu and Lei Zhang",
title = "A Framework for Neural Network Architecture and
Compile Co-optimization",
journal = j-TECS,
volume = "22",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533251",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3533251",
abstract = "The efficiency of deep neural network (DNN) solutions
on real hardware devices are mainly decided by the DNN
architecture and the compiler-level scheduling strategy
on the hardware. When we try to fully exploit the
underlying hardware and obtain the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Young:2023:CAD,
author = "May Young and Alan J. Hu and Guy G. F. Lemieux",
title = "Cache Abstraction for Data Race Detection in
Heterogeneous Systems with Non-coherent Accelerators",
journal = j-TECS,
volume = "22",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3535457",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3535457",
abstract = "Embedded systems are becoming increasingly complex and
heterogeneous, featuring multiple processor cores
(which might themselves be heterogeneous) as well as
specialized hardware accelerators, all accessing shared
memory. Many accelerators are non-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Brilli:2023:ECM,
author = "Gianluca Brilli and Roberto Cavicchioli and Marco
Solieri and Paolo Valente and Andrea Marongiu",
title = "Evaluating Controlled Memory Request Injection for
Efficient Bandwidth Utilization and Predictable
Execution in Heterogeneous {SoCs}",
journal = j-TECS,
volume = "22",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3548773",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3548773",
abstract = "High-performance embedded platforms are increasingly
adopting heterogeneous systems-on-chip (HeSoC) that
couple multi-core CPUs with accelerators such as GPU,
FPGA, or AI engines. Adopting HeSoCs in the context of
real-time workloads is not immediately \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Das:2023:EFS,
author = "Satyajit Das and Kevin Martin and Thomas Peyret and
Philippe Coussy",
title = "An Efficient and Flexible Stochastic {CGRA} Mapping
Approach",
journal = j-TECS,
volume = "22",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3550071",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3550071",
abstract = "Coarse-Grained Reconfigurable Array (CGRA)
architectures are promising high-performance and
power-efficient platforms. However, mapping
applications efficiently on CGRA is a challenging task.
This is known to be an NP complete problem. Hence,
finding good \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saberi:2023:POT,
author = "Iman Saberi and Fathiyeh Faghih and Farzad Sobhi
Bavil",
title = "A Passive Online Technique for Learning Hybrid
Automata from {Input\slash} Output Traces",
journal = j-TECS,
volume = "22",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3556543",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3556543",
abstract = "Specification synthesis is the process of deriving a
model from the input-output traces of a system. It is
used extensively in test design, reverse engineering,
and system identification. One type of the resulting
artifact of this process for cyber-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cleaveland:2023:FVN,
author = "Rachel Cleaveland and Stefan Mitsch and Andr{\'e}
Platzer",
title = "Formally Verified Next-generation Airborne Collision
Avoidance Games in {ACAS X}",
journal = j-TECS,
volume = "22",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544970",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544970",
abstract = "The design of aircraft collision avoidance algorithms
is a subtle but important challenge that merits the
need for provable safety guarantees. Obtaining such
guarantees is nontrivial given the unpredictability of
the interplay of the intruder aircraft \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mondal:2023:HTD,
author = "Anindan Mondal and Shubrojyoti Karmakar and Mahabub
Hasan Mahalat and Suchismita Roy and Bibhash Sen and
Anupam Chattopadhyay",
title = "Hardware {Trojan} Detection using Transition
Probability with Minimal Test Vectors",
journal = j-TECS,
volume = "22",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3545000",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3545000",
abstract = "Hardware Trojans (HTs) are malicious manipulations of
the standard functionality of an integrated circuit
(IC). Sophisticated defense against HT attacks has
become the utmost current research endeavor. In
particular, the HTs whose operations depend on the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fradet:2023:RRD,
author = "Pascal Fradet and Alain Girault and Ruby Krishnaswamy
and Xavier Nicollin and Arash Shafiei",
title = "{RDF}: a Reconfigurable Dataflow Model of
Computation",
journal = j-TECS,
volume = "22",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544972",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544972",
abstract = "Dataflow Models of Computation (MoCs) are widely used
in embedded systems, including multimedia processing,
digital signal processing, telecommunications, and
automatic control. In a dataflow MoC, an application is
specified as a graph of actors connected \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rodionova:2023:TRT,
author = "Alena Rodionova and Lars Lindemann and Manfred Morari
and George Pappas",
title = "Temporal Robustness of Temporal Logic Specifications:
Analysis and Control Design",
journal = j-TECS,
volume = "22",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3550072",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3550072",
abstract = "We study the temporal robustness of temporal logic
specifications and show how to design temporally robust
control laws for time-critical control systems. This
topic is of particular interest in connected systems
and interleaving processes such as multi-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xu:2023:LVL,
author = "Zirui Xu and Fuxun Yu and Chenchen Liu and Xiang
Chen",
title = "{LanCeX}: a Versatile and Lightweight Defense Method
against Condensed Adversarial Attacks in Image and
Audio Recognition",
journal = j-TECS,
volume = "22",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3555375",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3555375",
abstract = "Convolutional Neural Networks (CNNs) are widely
deployed in various embedded recognition applications.
However, they demonstrate a considerable vulnerability
to adversarial attacks, which leverage the
well-designed perturbations to mislead the recognition
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huang:2023:DEL,
author = "Wenbo Huang and Lei Zhang and Shuoyuan Wang and Hao Wu
and Aiguo Song",
title = "Deep Ensemble Learning for Human Activity Recognition
Using Wearable Sensors via Filter Activation",
journal = j-TECS,
volume = "22",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3551486",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3551486",
abstract = "During the past decade, human activity recognition (
HAR ) using wearable sensors has become a new research
hot spot due to its extensive use in various
application domains such as healthcare, fitness, smart
homes, and eldercare. Deep neural networks, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hessien:2023:PPS,
author = "Salah Hessien and Mohamed Hassan",
title = "{PISCOT}: a Pipelined Split-Transaction
{COTS-Coherent} Bus for Multi-Core Real-Time Systems",
journal = j-TECS,
volume = "22",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3556975",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3556975",
abstract = "Tasks in modern embedded systems such as automotive
and avionics communicate among each other using shared
data towards achieving the desired functionality of the
whole system. In commodity platforms, cores communicate
data through the shared memory \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yeh:2023:WRR,
author = "Po-Chen Yeh and Chin-Hsien Wu and Yung-Hsiang Lin and
Ming-Yan Wu",
title = "A Write-Related and Read-Related {DRAM} Allocation
Strategy Inside Solid-State Drives {(SSDs)}",
journal = j-TECS,
volume = "22",
number = "1",
pages = "17:1--17:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561301",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3561301",
abstract = "Although NAND flash memory has the advantages of small
size, low-power consumption, shock resistance, and fast
access speed, NAND flash memory still faces the
problems of ``out-of-place updates,'' ``garbage
collection,'' and ``unbalanced execution time'' due to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ali:2023:ESE,
author = "Ali J. {Ben Ali} and Marziye Kouroshli and Sofiya
Semenova and Zakieh Sadat Hashemifar and Steven Y. Ko
and Karthik Dantu",
title = "{Edge-SLAM}: Edge-Assisted Visual Simultaneous
Localization and Mapping",
journal = j-TECS,
volume = "22",
number = "1",
pages = "18:1--18:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561972",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3561972",
abstract = "Localization in urban environments is becoming
increasingly important and used in tools such as ARCore
[ 18 ], ARKit [ 34 ] and others. One popular mechanism
to achieve accurate indoor localization and a map of
the space is using Visual Simultaneous \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Koh:2023:PST,
author = "Jaime Koh and Bruno Bodin",
title = "{$K$}-Periodic Scheduling for Throughput-Buffering
Trade-Off Exploration of {CSDF}",
journal = j-TECS,
volume = "22",
number = "1",
pages = "19:1--19:??",
month = jan,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3559760",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:25 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3559760",
abstract = "The design of time-critical embedded systems often
requires static models of computation such as
cyclo-static dataflow. These models enable performance
guarantees, execution correctness, and optimized memory
usage. Nonetheless, determining optimal buffer
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ernst:2023:ACN,
author = "Rolf Ernst and Dominik St{\"o}hrmann and Alex Bendrick
and Adam Kostrzewa",
title = "Application-centric Network Management --- Addressing
Safety and Real-time in {V2X} Applications",
journal = j-TECS,
volume = "22",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3528411",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3528411",
abstract = "The current roadmaps and surveys for future wireless
networking typically focus on communication and
networking technologies and use representative
applications to derive future network requirements.
Such a benchmarking approach, however, does not cover
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pujol:2023:VEC,
author = "Roger Pujol and Josep Jorba and Hamid Tabani and
Leonidas Kosmidis and Enrico Mezzetti and Jaume Abella
and Francisco Cazorla",
title = "Vector Extensions in {COTS} Processors to Increase
Guaranteed Performance in Real-Time Systems",
journal = j-TECS,
volume = "22",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561054",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3561054",
abstract = "The need for increased application performance in
high-integrity systems such as those in avionics is on
the rise as software continues to implement more
complex functionalities. The prevalent computing
solution for future high-integrity embedded products
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sarwar:2023:CPE,
author = "Mir Sarwar and Rajarshi Ray and Ansuman Banerjee",
title = "A Contrastive Plan Explanation Framework for Hybrid
System Models",
journal = j-TECS,
volume = "22",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561532",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3561532",
abstract = "In artificial intelligence planning, having an
explanation of a plan given by a planner is often
desirable. The ability to explain various aspects of a
synthesized plan to an end user not only brings in
trust on the planner but also reveals insights of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Erata:2023:EEA,
author = "Ferhat Erata and Eren Yildiz and Arda Goknil and Kasim
Sinan Yildirim and Jakub Szefer and Ruzica Piskac and
Gokcin Sezgin",
title = "{ETAP}: Energy-aware Timing Analysis of Intermittent
Programs",
journal = j-TECS,
volume = "22",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563216",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3563216",
abstract = "Energy harvesting battery-free embedded devices rely
only on ambient energy harvesting that enables
stand-alone and sustainable IoT applications. These
devices execute programs when the harvested ambient
energy in their energy reservoir is sufficient to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gkeka:2023:RSC,
author = "Maria Rafaela Gkeka and Alexandros Patras and Nikolaos
Tavoularis and Stylianos Piperakis and Emmanouil
Hourdakis and Panos Trahanias and Christos D.
Antonopoulos and Spyros Lalis and Nikolaos Bellas",
title = "Reconfigurable System-on-Chip Architectures for Robust
Visual {SLAM} on Humanoid Robots",
journal = j-TECS,
volume = "22",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3570210",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3570210",
abstract = "Visual Simultaneous Localization and Mapping (vSLAM)
is the method of employing an optical sensor to map the
robot's observable surroundings while also identifying
the robot's pose in relation to that map. The accuracy
and speed of vSLAM calculations can \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2023:HPI,
author = "Xinyi Hu and Debiao He and Min Luo and Cong Peng and
Qi Feng and Xinyi Huang",
title = "High-Performance Implementation of the Identity-Based
Signature Scheme in {IEEE P1363} on {GPU}",
journal = j-TECS,
volume = "22",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564784",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3564784",
abstract = "Identity-based cryptography is proposed to solve the
complicated certificate management of traditional
public-key cryptography. The pairing computation and
high-level tower extension field arithmetic turn out to
be the performance bottleneck of pairing-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kundu:2023:SRB,
author = "Atanu Kundu and Sarthak Das and Rajarshi Ray",
title = "{SAT-Reach}: a Bounded Model Checker for Affine Hybrid
Systems",
journal = j-TECS,
volume = "22",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3567425",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3567425",
abstract = "Bounded model checking (BMC) is well-known to be
undecidable even for simple hybrid systems. Existing
work targeted for a wide class of non-linear hybrid
systems reduces the BMC problem to the satisfiability
problem of an satisfiability modulo theory \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ogras:2023:ISI,
author = "Umit Y. Ogras and Radu Marculescu and Trevor N. Mudge
and Michael Kishinevsky",
title = "Introduction to the Special Issue on Domain-Specific
System-on-Chip Architectures and Run-Time Management
Techniques",
journal = j-TECS,
volume = "22",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3567834",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3567834",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Krishnakumar:2023:DSA,
author = "Anish Krishnakumar and Umit Ogras and Radu Marculescu
and Mike Kishinevsky and Trevor Mudge",
title = "Domain-Specific Architectures: Research Problems and
Promising Approaches",
journal = j-TECS,
volume = "22",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563946",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3563946",
abstract = "Process technology-driven performance and energy
efficiency improvements have slowed down as we approach
physical design limits. General-purpose manycore
architectures attempt to circumvent this challenge, but
they have a significant performance and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2023:EDS,
author = "Yueting Li and Wang Kang and Kunyu Zhou and Keni Qiu
and Weisheng Zhao",
title = "Experimental Demonstration of {STT-MRAM}-based
Nonvolatile Instantly On\slash Off System for {IoT}
Applications: Case Studies",
journal = j-TECS,
volume = "22",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546193",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3546193",
abstract = "Energy consumption has been a big challenge for
electronic devices, particularly for battery-powered
Internet of Things (IoT) equipment. To address such a
challenge, on the one hand, low-power electronic design
methodologies and novel power management \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aminabadi:2023:SAE,
author = "Reza Yazdani Aminabadi and Olatunji Ruwase and Minjia
Zhang and Yuxiong He and Jose-Maria Arnau and Antonio
Gonazalez",
title = "{SHARP}: an Adaptable, Energy-Efficient Accelerator
for Recurrent Neural Networks",
journal = j-TECS,
volume = "22",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3552513",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3552513",
abstract = "The effectiveness of Recurrent Neural Networks (RNNs)
for tasks such as Automatic Speech Recognition has
fostered interest in RNN inference acceleration. Due to
the recurrent nature and data dependencies of RNN
computations, prior work has designed \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boroujerdian:2023:FES,
author = "Behzad Boroujerdian and Ying Jing and Devashree
Tripathy and Amit Kumar and Lavanya Subramanian and
Luke Yen and Vincent Lee and Vivek Venkatesan and Amit
Jindal and Robert Shearer and Vijay Janapa Reddi",
title = "{FARSI}: an Early-stage Design Space Exploration
Framework to Tame the Domain-specific System-on-chip
Complexity",
journal = j-TECS,
volume = "22",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544016",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544016",
abstract = "Domain-specific SoCs (DSSoCs) are an attractive
solution for domains with extremely stringent power,
performance, and area constraints. However, DSSoCs
suffer from two fundamental complexities. On the one
hand, their many specialized hardware blocks \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Brumar:2023:EDA,
author = "Iulian Brumar and Georgios Zacharopoulos and Yuan Yao
and Saketh Rama and David Brooks and Gu-Yeon Wei",
title = "Early {DSE} and Automatic Generation of Coarse-grained
Merged Accelerators",
journal = j-TECS,
volume = "22",
number = "2",
pages = "32:1--32:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546070",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3546070",
abstract = "Post-Moore's law area-constrained systems rely on
accelerators to deliver performance enhancements.
Coarse-grained accelerators can offer substantial
domain acceleration, but manual, ad hoc identification
of code to accelerate is prohibitively expensive.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Inci:2023:QFQ,
author = "Ahmet Inci and Siri Virupaksha and Aman Jain and
Ting-Wu Chin and Venkata Thallam and Ruizhou Ding and
Diana Marculescu",
title = "{QUIDAM}: a Framework for Quantization-aware {DNN}
Accelerator and Model Co-Exploration",
journal = j-TECS,
volume = "22",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3555807",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3555807",
abstract = "As the machine learning and systems communities strive
to achieve higher energy efficiency through custom deep
neural network (DNN) accelerators, varied precision or
quantization levels, and model compression techniques,
there is a need for design space \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahangari:2023:HBH,
author = "Hamzeh Ahangari and Muhammet Mustafa {\"O}zdal and
{\"O}zcan {\"O}zt{\"u}rk",
title = "{HLS}-based High-throughput and Work-efficient
Synthesizable Graph Processing Template Pipeline",
journal = j-TECS,
volume = "22",
number = "2",
pages = "34:1--34:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529256",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3529256",
abstract = "Hardware systems composed of diverse execution
resources are being deployed to cope with the
complexity and performance requirements of Artificial
Intelligence (AI) and Machine Learning (ML)
applications. With the emergence of new hardware
platforms, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Koul:2023:AAA,
author = "Kalhan Koul and Jackson Melchert and Kavya Sreedhar
and Leonard Truong and Gedeon Nyengele and Keyi Zhang
and Qiaoyi Liu and Jeff Setter and Po-Han Chen and
Yuchen Mei and Maxwell Strange and Ross Daly and Caleb
Donovick and Alex Carsello and Taeyoung Kong and
Kathleen Feng and Dillon Huff and Ankita Nayak and
Rajsekhar Setaluri and James Thomas and Nikhil
Bhagdikar and David Durst and Zachary Myers and Nestan
Tsiskaridze and Stephen Richardson and Rick Bahr and
Kayvon Fatahalian and Pat Hanrahan and Clark Barrett
and Mark Horowitz and Christopher Torng and Fredrik
Kjolstad and Priyanka Raina",
title = "{AHA}: an Agile Approach to the Design of
Coarse-Grained Reconfigurable Accelerators and
Compilers",
journal = j-TECS,
volume = "22",
number = "2",
pages = "35:1--35:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3534933",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3534933",
abstract = "With the slowing of Moore's law, computer architects
have turned to domain-specific hardware specialization
to continue improving the performance and efficiency of
computing systems. However, specialization typically
entails significant modifications to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mack:2023:CCI,
author = "Joshua Mack and Sahil Hassan and Nirmal Kumbhare and
Miguel Castro Gonzalez and Ali Akoglu",
title = "{CEDR}: a Compiler-integrated, Extensible {DSSoC}
Runtime",
journal = j-TECS,
volume = "22",
number = "2",
pages = "36:1--36:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3529257",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3529257",
abstract = "In this work, we present a Compiler-integrated,
Extensible Domain Specific System on Chip Runtime
(CEDR) ecosystem to facilitate research toward
addressing the challenges of architecture, system
software, and application development with distinct
plug-and-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2023:ARL,
author = "Huili Chen and Xinqiao Zhang and Ke Huang and Farinaz
Koushanfar",
title = "{AdaTest}: Reinforcement Learning and Adaptive
Sampling for On-chip Hardware {Trojan} Detection",
journal = j-TECS,
volume = "22",
number = "2",
pages = "37:1--37:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544015",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544015",
abstract = "This paper proposes AdaTest, a novel adaptive test
pattern generation framework for efficient and reliable
Hardware Trojan (HT) detection. HT is a backdoor attack
that tampers with the design of victim integrated
circuits (ICs). AdaTest improves the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Anderson:2023:VPM,
author = "Jeff Anderson and Engin Kayraklioglu and Hamid Reza
Imani and Chen Shen and Mario Miscuglio and Volker J.
Sorger and Tarek El-Ghazawi",
title = "Virtualizing a Post-{Moore}'s Law Analog Mesh
Processor: The Case of a Photonic {PDE} Accelerator",
journal = j-TECS,
volume = "22",
number = "2",
pages = "38:1--38:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544971",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544971",
abstract = "Innovative processor architectures aim to play a
critical role in future sustainment of performance
improvements under severe limitations imposed by the
end of Moore's Law. The Reconfigurable Optical Computer
(ROC) is one such innovative, Post-Moore's Law
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{S:2023:PQA,
author = "Aswathy N. S. and Arnab Sarkar and Hemangee Kapoor",
title = "A Predictable {QoS}-aware Memory Request Scheduler for
Soft Real-time Systems",
journal = j-TECS,
volume = "22",
number = "2",
pages = "39:1--39:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561052",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Mar 11 08:39:26 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3561052",
abstract = "A memory controller manages the flow of data to and
from attached memory devices. The order in which a set
of contending memory requests from different tasks are
serviced significantly influences the rate of progress
and completion times of these tasks. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sifakis:2023:TAS,
author = "Joseph Sifakis and David Harel",
title = "Trustworthy Autonomous System Development",
journal = j-TECS,
volume = "22",
number = "3",
pages = "40:1--40:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3545178",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3545178",
abstract = "Autonomous systems emerge from the need to
progressively replace human operators by autonomous
agents in a wide variety of application areas. We offer
an analysis of the state of the art in developing
autonomous systems, focusing on design and validation
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shrivastava:2023:ATS,
author = "Aviral Shrivastava and Jian-Jia Chen and Akash Kumar
and Anup Das",
title = "{ACM TECS} Special Issue on Embedded System Security
Tutorials",
journal = j-TECS,
volume = "22",
number = "3",
pages = "41:1--41:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3594872",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3594872",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2023:TTR,
author = "Huili Chen and Farinaz Koushanfar",
title = "Tutorial: Toward Robust Deep Learning against
Poisoning Attacks",
journal = j-TECS,
volume = "22",
number = "3",
pages = "42:1--42:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3574159",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3574159",
abstract = "Deep Learning (DL) has been increasingly deployed in
various real-world applications due to its
unprecedented performance and automated capability of
learning hidden representations. While DL can achieve
high task performance, the training process of a DL
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Feldtkeller:2023:COS,
author = "Jakob Feldtkeller and Pascal Sasdrich and Tim
G{\"u}neysu",
title = "Challenges and Opportunities of Security-Aware {EDA}",
journal = j-TECS,
volume = "22",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3576199",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3576199",
abstract = "The foundation of every digital system is based on
hardware in which security, as a core service of many
applications, should be deeply embedded. Unfortunately,
the knowledge of system security and efficient hardware
design is spread over different \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rautakoura:2023:DSH,
author = "Antti Rautakoura and Timo H{\"a}m{\"a}l{\"a}inen",
title = "Does {SoC} Hardware Development Become Agile by Saying
So: a Literature Review and Mapping Study",
journal = j-TECS,
volume = "22",
number = "3",
pages = "44:1--44:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578554",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3578554",
abstract = "The success of agile development methods in software
development has raised interest in System-on-Chip (SoC)
design, which involves high architectural and
development process complexity under time and project
management pressure. This article discovers \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pearce:2023:HLA,
author = "Hammond Pearce and Ramesh Karri and Benjamin Tan",
title = "High-Level Approaches to Hardware Security: a
Tutorial",
journal = j-TECS,
volume = "22",
number = "3",
pages = "45:1--45:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3577200",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3577200",
abstract = "Designers use third-party intellectual property (IP)
cores and outsource various steps in the integrated
circuit (IC) design and manufacturing flow. As a
result, security vulnerabilities have been rising. This
is forcing IC designers and end users to re-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gubbi:2023:HTD,
author = "Kevin Immanuel Gubbi and Banafsheh Saber Latibari and
Anirudh Srikanth and Tyler Sheaves and Sayed Arash
Beheshti-Shirazi and Sai Manoj PD and Satareh Rafatirad
and Avesta Sasan and Houman Homayoun and Soheil
Salehi",
title = "Hardware {Trojan} Detection Using Machine Learning: a
Tutorial",
journal = j-TECS,
volume = "22",
number = "3",
pages = "46:1--46:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3579823",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3579823",
abstract = "With the growth and globalization of IC design and
development, there is an increase in the number of
Designers and Design houses. As setting up a
fabrication facility may easily cost upwards of \$20
billion, costs for advanced nodes may be even greater.
IC design houses that cannot produce their chips
in-house have no option but to use external foundries
that are often in other countries. Establishing trust
with these external foundries can be a challenge, and
these foundries are assumed to be untrusted. The use of
these untrusted foundries in the global semiconductor
supply chain has raised concerns about the security of
the fabricated ICs targeted for sensitive applications.
One of these security threats is the adversarial
infestation of fabricated ICs with a Hardware Trojan
(HT). An HT can be broadly described as a malicious
modification to a circuit to control, modify, disable,
or monitor its logic. Conventional VLSI manufacturing
tests and verification methods fail to detect HT due to
the different and unmodeled nature of these malicious
modifications. Current state-of-the-art HT detection
methods utilize statistical analysis of various
side-channel information collected from ICs, such as
power analysis, power supply transient analysis,
regional supply current analysis, temperature analysis,
wireless transmission power analysis, and delay
analysis. To detect HTs, most methods require a
Trojan-free reference golden IC. A signature from these
golden ICs is extracted and used to detect ICs with
HTs. However, access to a golden IC is not always
feasible. Thus, a mechanism for HT detection is sought
that does not require the golden IC. Machine Learning
(ML) approaches have emerged to be extremely useful in
helping eliminate the need for a golden IC. Recent
works on utilizing ML for HT detection have been shown
to be promising in achieving this goal. Thus, in this
tutorial, we will explain utilizing ML as a solution to
the challenge of HT detection. Additionally, we will
describe the Electronic Design Automation (EDA) tool
flow for automating ML-assisted HT detection. Moreover,
to further discuss the benefits of ML-assisted HT
detection solutions, we will demonstrate a Neural
Network (NN)-assisted timing profiling method for HT
detection. Finally, we will discuss the shortcomings
and open challenges of ML-assisted HT detection
methods.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2023:TRS,
author = "Tailin Liang and Lei Wang and Shaobo Shi and John
Glossner and Xiaotong Zhang",
title = "{TCX}: a {RISC} Style Tensor Computing Extension and a
Programmable Tensor Processor",
journal = j-TECS,
volume = "22",
number = "3",
pages = "47:1--47:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568310",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3568310",
abstract = "Neural network processors and accelerators are
domain-specific architectures deployed to solve the
high computational requirements of deep learning
algorithms. This article proposes a new instruction set
extension for tensor computing, TCX, using Reduced
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dong:2023:RAS,
author = "Yi Dong and Wei Huang and Vibhav Bharti and Victoria
Cox and Alec Banks and Sen Wang and Xingyu Zhao and
Sven Schewe and Xiaowei Huang",
title = "Reliability Assessment and Safety Arguments for
Machine Learning Components in System Assurance",
journal = j-TECS,
volume = "22",
number = "3",
pages = "48:1--48:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3570918",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3570918",
abstract = "The increasing use of Machine Learning (ML) components
embedded in autonomous systems-so-called
Learning-Enabled Systems (LESs)-has resulted in the
pressing need to assure their functional safety. As for
traditional functional safety, the emerging \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yan:2023:MBD,
author = "Zujia Yan and Yi Zhuang and Weining Zheng and Jingjing
Gu",
title = "Multi-bit Data Flow Error Detection Method Based on
{SDC} Vulnerability Analysis",
journal = j-TECS,
volume = "22",
number = "3",
pages = "49:1--49:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3572838",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3572838",
abstract = "One of the most difficult data flow errors to detect
caused by single-event upsets in space radiation is the
Silent Data Corruption (SDC). To solve the problem of
multi-bit upsets causing program SDC, an instruction
multi-bit SDC vulnerability prediction \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kohler:2023:RCE,
author = "Leonie K{\"o}hler and Phil Hertha and Matthias Beckert
and Alex Bendrick and Rolf Ernst",
title = "Robust Cause-Effect Chains with Bounded Execution Time
and System-Level Logical Execution Time",
journal = j-TECS,
volume = "22",
number = "3",
pages = "50:1--50:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3573388",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3573388",
abstract = "In automotive and industrial real-time software
systems, the primary timing constraints relate to
cause-effect chains. A cause-effect chain is a sequence
of linked tasks and it typically implements the process
of reading sensor data, computing algorithms,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tuli:2023:CNA,
author = "Shikhar Tuli and Chia-Hao Li and Ritvik Sharma and
Niraj K. Jha",
title = "{CODEBench}: a Neural Architecture and Hardware
Accelerator Co-Design Framework",
journal = j-TECS,
volume = "22",
number = "3",
pages = "51:1--51:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3575798",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3575798",
abstract = "Recently, automated co-design of machine learning (ML)
models and accelerator architectures has attracted
significant attention from both the industry and
academia. However, most co-design frameworks either
explore a limited search space or employ \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yi:2023:EEE,
author = "Saehanseul Yi and Tae-Wook Kim and Jong-Chan Kim and
Nikil Dutt",
title = "{EASYR}: Energy-Efficient Adaptive System
Reconfiguration for Dynamic Deadlines in Autonomous
Driving on Multicore Processors",
journal = j-TECS,
volume = "22",
number = "3",
pages = "52:1--52:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3570503",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3570503",
abstract = "The increasing computing demands of autonomous driving
applications have driven the adoption of multicore
processors in real-time systems, which in turn renders
energy optimizations critical for reducing battery
capacity and vehicle weight. A typical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zacharopoulos:2023:TEH,
author = "Georgios Zacharopoulos and Adel Ejjeh and Ying Jing
and En-Yu Yang and Tianyu Jia and Iulian Brumar and
Jeremy Intan and Muhammad Huzaifa and Sarita Adve and
Vikram Adve and Gu-Yeon Wei and David Brooks",
title = "{Trireme}: Exploration of Hierarchical Multi-level
Parallelism for Hardware Acceleration",
journal = j-TECS,
volume = "22",
number = "3",
pages = "53:1--53:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3580394",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3580394",
abstract = "The design of heterogeneous systems that include
domain specific accelerators is a challenging and
time-consuming process. While taking into account area
constraints, designers must decide which parts of an
application to accelerate in hardware and which
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lindemann:2023:RSS,
author = "Lars Lindemann and Lejun Jiang and Nikolai Matni and
George J. Pappas",
title = "Risk of Stochastic Systems for Temporal Logic
Specifications",
journal = j-TECS,
volume = "22",
number = "3",
pages = "54:1--54:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3580490",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3580490",
abstract = "The wide availability of data coupled with the
computational advances in artificial intelligence and
machine learning promise to enable many future
technologies such as autonomous driving. While there
has been a variety of successful demonstrations of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yin:2023:CBR,
author = "Jun Yin and Marian Verhelst",
title = "{CNN}-based Robust Sound Source Localization with
{SRP-PHAT} for the Extreme Edge",
journal = j-TECS,
volume = "22",
number = "3",
pages = "55:1--55:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3586996",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3586996",
abstract = "Robust sound source localization for environments with
noise and reverberation are increasingly exploiting
deep neural networks fed with various acoustic
features. Yet, state-of-the-art research mainly focuses
on optimizing algorithmic accuracy, resulting
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tabanelli:2023:DAY,
author = "Enrico Tabanelli and Giuseppe Tagliavini and Luca
Benini",
title = "{DNN} Is Not All You Need: Parallelizing Non-neural
{ML} Algorithms on Ultra-low-power {IoT} Processors",
journal = j-TECS,
volume = "22",
number = "3",
pages = "56:1--56:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3571133",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3571133",
abstract = "Machine Learning (ML) functions are becoming
ubiquitous in latency- and privacy-sensitive IoT
applications, prompting a shift toward near-sensor
processing at the extreme edge and the consequent
increasing adoption of Parallel Ultra-low-power (PULP)
IoT \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2023:EAD,
author = "Yirui Wu and Lilai Zhang and Zonghua Gu and Hu Lu and
Shaohua Wan",
title = "Edge-{AI}-Driven Framework with Efficient Mobile
Network Design for Facial Expression Recognition",
journal = j-TECS,
volume = "22",
number = "3",
pages = "57:1--57:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587038",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3587038",
abstract = "Facial Expression Recognition (FER) in the wild poses
significant challenges due to realistic occlusions,
illumination, scale, and head pose variations of the
facial images. In this article, we propose an
Edge-AI-driven framework for FER. On the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Isik:2023:NNC,
author = "Berivan Isik and Kristy Choi and Xin Zheng and Tsachy
Weissman and Stefano Ermon and H.-S. Philip Wong and
Armin Alaghi",
title = "Neural Network Compression for Noisy Storage Devices",
journal = j-TECS,
volume = "22",
number = "3",
pages = "58:1--58:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3588436",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3588436",
abstract = "Compression and efficient storage of neural network
(NN) parameters is critical for applications that run
on resource-constrained devices. Despite the
significant progress in NN model compression, there has
been considerably less investigation in the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kloda:2023:LLS,
author = "Tomasz Kloda and Giovani Gracioli and Rohan Tabish and
Reza Mirosanlou and Renato Mancuso and Rodolfo
Pellizzoni and Marco Caccamo",
title = "Lazy Load Scheduling for Mixed-criticality
Applications in Heterogeneous {MPSoCs}",
journal = j-TECS,
volume = "22",
number = "3",
pages = "59:1--59:??",
month = may,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587694",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Jul 3 08:20:15 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3587694",
abstract = "Newly emerging multiprocessor system-on-a-chip (MPSoC)
platforms provide hard processing cores with
programmable logic (PL) for high-performance computing
applications. In this article, we take a deep look into
these commercially available heterogeneous \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Parra:2023:TMV,
author = "Pablo Parra and Antonio {Da Silva} and Borja Losa and
J. Ignacio Garc{\'\i}a and {\'O}scar R. Polo and
Agust{\'\i}n Mart{\'\i}nez and Sebasti{\'a}n
S{\'a}nchez",
title = "Tailor-made Virtualization Monitor Design for {CPU}
Virtualization on {LEON} Processors",
journal = j-TECS,
volume = "22",
number = "4",
pages = "60:1--60:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3584702",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3584702",
abstract = "In recent decades, mixed-criticality systems have been
widely adopted to reduce the complexity and development
times of real-time critical applications. In these
systems, applications run on a separation kernel
hypervisor, a software element that controls \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Papaioannou:2023:ULP,
author = "Alexios Papaioannou and Charalampos S. Kouzinopoulos
and Dimosthenis Ioannidis and Dimitrios Tzovaras",
title = "An Ultra-low-power Embedded {AI} Fire Detection and
Crowd Counting System for Indoor Areas",
journal = j-TECS,
volume = "22",
number = "4",
pages = "61:1--61:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3582433",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3582433",
abstract = "Fire incidents in residential and industrial areas are
often the cause of human casualties and property
damage. Although there are existing systems that detect
fire and monitor the presence of people in indoor
areas, research on their implementation in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhattacharjee:2023:XEA,
author = "Abhiroop Bhattacharjee and Abhishek Moitra and
Priyadarshini Panda",
title = "{XploreNAS}: Explore Adversarially Robust and
Hardware-efficient Neural Architectures for Non-ideal
Xbars",
journal = j-TECS,
volume = "22",
number = "4",
pages = "62:1--62:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3593045",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3593045",
abstract = "Compute In-Memory platforms such as memristive
crossbars are gaining focus as they facilitate
acceleration of Deep Neural Networks (DNNs) with high
area and compute efficiencies. However, the intrinsic
non-idealities associated with the analog nature of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gunzel:2023:CTA,
author = "Mario G{\"u}nzel and Kuan-Hsun Chen and Niklas Ueter
and Georg von der Br{\"u}ggen and Marco D{\"u}rr and
Jian-Jia Chen",
title = "Compositional Timing Analysis of Asynchronized
Distributed Cause-effect Chains",
journal = j-TECS,
volume = "22",
number = "4",
pages = "63:1--63:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587036",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3587036",
abstract = "Real-time systems require the formal guarantee of
timing constraints, not only for the individual tasks
but also for the end-to-end latency of data flows. The
data flow among multiple tasks, e.g., from sensors to
actuators, is described by a cause-effect \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shadab:2023:HHC,
author = "Rakin Muhammad Shadab and Yu Zou and Sanjay Gandham
and Amro Awad and Mingjie Lin",
title = "{HMT}: a Hardware-centric Hybrid Bonsai {Merkle} Tree
Algorithm for High-performance Authentication",
journal = j-TECS,
volume = "22",
number = "4",
pages = "64:1--64:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3595179",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3595179",
abstract = "The Bonsai Merkle tree (BMT) is a widely used tree
structure for authentication of metadata such as
encryption counters in a secure computing system.
Common BMT algorithms were designed for traditional Von
Neumann architectures with a software-centric
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Min:2023:MTK,
author = "Donghyun Min and Kihyun Kim and Chaewon Moon and Awais
Khan and Seungjin Lee and Changhwan Yun and Woosuk
Chung and Youngjae Kim",
title = "A Multi-tenant Key-value {SSD} with Secondary Index
for Search Query Processing and Analysis",
journal = j-TECS,
volume = "22",
number = "4",
pages = "65:1--65:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3590153",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3590153",
abstract = "Key-value SSDs (KVSSDs) introduced so far are limited
in their use as an alternative to the key-value store
running on the host due to the following technical
limitations. First, they were designed only for a
single tenant, limiting the use of multiple \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2023:OCS,
author = "Lin Zhang and Zifan Wang and Fanxin Kong",
title = "Optimal Checkpointing Strategy for Real-time Systems
with Both Logical and Timing Correctness",
journal = j-TECS,
volume = "22",
number = "4",
pages = "66:1--66:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603172",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3603172",
abstract = "Real-time systems are susceptible to adversarial
factors such as faults and attacks, leading to severe
consequences. This paper presents an optimal checkpoint
scheme to bolster fault resilience in real-time
systems, addressing both logical consistency and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{West:2023:RTU,
author = "Richard West and Ahmad Golchin and Anton Njavro",
title = "Real-Time {USB} Networking and Device {I/O}",
journal = j-TECS,
volume = "22",
number = "4",
pages = "67:1--67:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604429",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3604429",
abstract = "Multicore PC-class embedded systems present an
opportunity to consolidate separate microcontrollers as
software-defined functions. For instance, an automotive
system with more than 100 electronic control units
(ECUs) could be replaced with one or, at most,.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kohl:2023:MBD,
author = "Maximilian A. K{\"o}hl and Holger Hermanns",
title = "Model-Based Diagnosis of Real-Time Systems: Robustness
Against Varying Latency, Clock Drift, and Out-of-Order
Observations",
journal = j-TECS,
volume = "22",
number = "4",
pages = "68:1--68:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597209",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3597209",
abstract = "Online fault diagnosis techniques are a key enabler of
effective failure mitigation. For real-time systems,
the problem of identifying faults is aggravated by
timing imprecisions such as varying latency between
events and their observation. This paper \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Terway:2023:RGM,
author = "Prerit Terway and Niraj K. Jha",
title = "{REPAIRS}: {Gaussian} Mixture Model-based Completion
and Optimization of Partially Specified Systems",
journal = j-TECS,
volume = "22",
number = "4",
pages = "69:1--69:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605147",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605147",
abstract = "Most system optimization techniques focus on finding
the values of the system components to achieve the best
performance. Searching over all component values gives
the search methodology the freedom to explore the
entire design space to determine the best \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hsu:2023:GBC,
author = "Yao-Jen Hsu and Chin-Hsien Wu and Yu-Chieh Tsai and
Chia-Cheng Liu",
title = "A Granularity-Based Clustering Method for Reducing
Write Amplification in Solid-State Drives",
journal = j-TECS,
volume = "22",
number = "4",
pages = "70:1--70:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605779",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605779",
abstract = "In recent years, solid-state drives (SSDs) that adopt
NAND flash memory have been widely used as the main
storage devices. In particular, NAND flash memory has a
special feature of ``out-of-place'' updates to write
the up-to-date data to a free page, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Niknafs:2023:RRM,
author = "Mina Niknafs and Petru Eles and Zebo Peng",
title = "Runtime Resource Management with Multiple-Step-Ahead
Workload Prediction",
journal = j-TECS,
volume = "22",
number = "4",
pages = "71:1--71:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605213",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605213",
abstract = "Modern embedded platforms need sophisticated resource
managers to utilize their heterogeneous computational
resources efficiently. Furthermore, such platforms are
subject to fluctuating workloads that are unforeseeable
at design time. Predicting the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bosio:2023:SIA,
author = "Alberto Bosio and Lara Dolecek and Alexandra Kourfali
and Sri Parameswaran and Alessandro Savino",
title = "Special Issue: {``Approximation at the Edge''}",
journal = j-TECS,
volume = "22",
number = "4",
pages = "72:1--72:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605757",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605757",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pradhan:2023:ETB,
author = "Chetana Pradhan and Martin Letras and J{\"u}rgen
Teich",
title = "Efficient Table-based Function Approximation on
{FPGAs} Using Interval Splitting and {BRAM}
Instantiation",
journal = j-TECS,
volume = "22",
number = "4",
pages = "73:1--73:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3580737",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/elefunt.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3580737",
abstract = "This article proposes a novel approach for the
generation of memory-efficient table-based function
approximation circuits for edge devices in general and
FPGAs in particular. Given a function $ f(x) $ to be
approximated in a given interval $ [x_0, x_0 + a) $ and
a maximum approximation error $ E_a $, the goal is to
determine a function table implementation with a
minimized memory footprint, i.e., number of entries
that need to be stored. Rather than state-of-the-art
work performing an equidistant sampling of the given
interval by so-called breakpoints and using linear
interpolation between two adjacent breakpoints to
determine $ f(x) $ at the maximum error bound, we
propose and compare three algorithms for splitting the
given interval into sub-intervals to reduce the
required memory footprint drastically based on the
observation that in sub-intervals of low gradient, a
coarser sampling grid may be assumed while guaranteeing
the maximum interpolation error bound $ E_a $.
Experiments on elementary mathematical functions show
that a large fraction in memory footprint may be saved.
Second, a hardware architecture implementing the
sub-interval selection, breakpoint lookup, and
interpolation at a latency of just 9 clock cycles is
introduced. Third, for each generated circuit design,
BRAMs are automatically instantiated rather than
synthesizing the reduced footprint function table using
LUT primitives, providing an additional degree of
resource efficiency. The approach presented here for
FPGAs can equally be applied to other circuit
technologies for fast and, at the same time,
memory-optimized function approximation at the edge.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Paul:2023:ANI,
author = "Sibendu Paul and Utsav Drolia and Y. Charlie Hu and
Srimat Chakradhar",
title = "{AQuA}: a New Image Quality Metric for Optimizing
Video Analytics Systems",
journal = j-TECS,
volume = "22",
number = "4",
pages = "74:1--74:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568423",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3568423",
abstract = "Millions of cameras at the edge are being deployed to
power a variety of different deep learning
applications. However, the frames captured by these
cameras are not always pristine-they can be distorted
due to lighting issues, sensor noise, compression
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Leon-Vega:2023:AGR,
author = "Luis G. Le{\'o}n-Vega and Eduardo Salazar-Villalobos
and Alejandro Rodriguez-Figueroa and Jorge
Castro-God{\'\i}nez",
title = "Automatic Generation of Resource and Accuracy
Configurable Processing Elements",
journal = j-TECS,
volume = "22",
number = "4",
pages = "75:1--75:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3594540",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3594540",
abstract = "Low-power consumption and scarce computational
resources limit the computation at the edge. Besides,
the approximate computing paradigm reports promising
techniques for designing accelerators to deal with
inherent limitations of the edge, and high-level
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Awais:2023:TOS,
author = "Muhammad Awais and Ali Zahir and Syed Ayaz Ali Shah
and Pedro Reviriego and Anees Ullah and Nasim Ullah and
Adam Khan and Hazrat Ali",
title = "Toward Optimal Softcore Carry-aware Approximate
Multipliers on {Xilinx} {FPGAs}",
journal = j-TECS,
volume = "22",
number = "4",
pages = "76:1--76:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564243",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3564243",
abstract = "Domain-specific accelerators for signal processing,
image processing, and machine learning are increasingly
being implemented on SRAM-based field-programmable gate
arrays (FPGAs). Owing to the inherent error tolerance
of such applications, approximate arithmetic
operations, in particular, the design of approximate
multipliers, have become an important research
problem. Truncation of lower bits is a widely used
approximation approach; however, analyzing and limiting
the effects of carry-propagation due to this
approximation has not been explored in detail yet. In
this article, an optimized carry-aware approximate
radix-4 Booth multiplier design is presented that
leverages the built-in slice look-up tables (LUTs) and
carry-chain resources in a novel configuration. The
proposed multiplier simplifies the computation of the
upper and lower bits and provides significant benefits
in terms of FPGA resource usage (LUTs saving
38.5\%--42.9\%), Power Delay Product (PDP saving
49.4\%--53\%), performance metric (LUTs $ \times $
critical path delay (CPD) $ \times $ PDP saving
68.9\%--73.1\%) and errors (70\% improvement in mean
relative error distance) compared to the latest
state-of-the-art designs. Therefore, the proposed
designs are an attractive choice to implement
multiplication on FPGA-based accelerators.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghosh:2023:EEA,
author = "Soumendu Kumar Ghosh and Arnab Raha and Vijay
Raghunathan",
title = "Energy-Efficient Approximate Edge Inference Systems",
journal = j-TECS,
volume = "22",
number = "4",
pages = "77:1--77:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3589766",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3589766",
abstract = "The rapid proliferation of the Internet of Things and
the dramatic resurgence of artificial intelligence
based application workloads have led to immense
interest in performing inference on energy-constrained
edge devices. Approximate computing (a design
\ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tsounis:2023:MFT,
author = "Ioannis Tsounis and Dimitris Agiakatsikas and Mihalis
Psarakis",
title = "A Methodology for Fault-tolerant {Pareto}-optimal
Approximate Designs of {FPGA}-based Accelerators",
journal = j-TECS,
volume = "22",
number = "4",
pages = "78:1--78:??",
month = jul,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3568021",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Aug 10 07:21:24 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3568021",
abstract = "Approximate Computing Techniques (ACTs) take advantage
of resilience computing applications to trade off among
output precision, area, power, and performance. ACTs
can lead to significant gains at affordable costs when
efficiently implemented on Field \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2023:BBS,
author = "Yunjie Pan and Jiecao Yu and Andrew Lukefahr and
Reetuparna Das and Scott Mahlke",
title = "{BitSET}: Bit-Serial Early Termination for Computation
Reduction in Convolutional Neural Networks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "98:1--98:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609093",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609093",
abstract = "Convolutional Neural Networks (CNNs) have demonstrated
remarkable performance across a wide range of machine
learning tasks. However, the high accuracy \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yang:2023:EEP,
author = "Zhao Yang and Qingshuang Sun",
title = "Energy-efficient Personalized Federated Search with
Graph for Edge Computing",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "99:1--99:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609435",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609435",
abstract = "Federated Learning (FL) is a popular method for
privacy-preserving machine learning on edge devices.
However, the heterogeneity of edge devices, including
differences \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2023:EEM,
author = "Yitu Wang and Shiyu Li and Qilin Zheng and Andrew
Chang and Hai Li and Yiran Chen",
title = "{EMS-i}: an Efficient Memory System Design with
Specialized Caching Mechanism for Recommendation
Inference",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "100:1--100:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609384",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609384",
abstract = "Recommendation systems have been widely embedded into
many Internet services. For example, Meta's deep
learning recommendation model (DLRM) \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sahoo:2023:ATS,
author = "Siva Satyendra Sahoo and Salim Ullah and Akash Kumar",
title = "{AxOTreeS}: a Tree Search Approach to Synthesizing
{FPGA}-based Approximate Operators",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "101:1--101:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609096",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609096",
abstract = "Approximate computing (AxC) provides the scope for
achieving disproportionate gains in a system's power,
performance, and area (PPA) metrics by leveraging an
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Afifi:2023:GGN,
author = "Salma Afifi and Febin Sunny and Amin Shafiee and Mahdi
Nikdast and Sudeep Pasricha",
title = "{GHOST}: a Graph Neural Network Accelerator using
Silicon Photonics",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "102:1--102:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609097",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609097",
abstract = "Graph neural networks (GNNs) have emerged as a
powerful approach for modelling and learning from
graph-structured data. Multiple fields have since
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ren:2023:PWB,
author = "Jiankang Ren and Chunxiao Liu and Chi Lin and Ran Bi
and Simeng Li and Zheng Wang and Yicheng Qian and
Zhichao Zhao and Guozhen Tan",
title = "Protection Window Based Security-Aware Scheduling
against Schedule-Based Attacks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "103:1--103:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609098",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609098",
abstract = "With widespread use of common-off-the-shelf components
and the drive towards connection with external
environments, the real-time systems are facing more
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sha:2023:PSR,
author = "Zhibing Sha and Jiaojiao Wu and Jun Li and Balazs
Gerofi and Zhigang Cai and Jianwei Liao",
title = "Proactive Stripe Reconstruction to Improve Cache Use
Efficiency of {SSD}-Based {RAID} Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "104:1--104:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609099",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609099",
abstract = "Solid-State Drives (SSDs) exhibit different failure
characteristics compared to conventional hard disk
drives. In particular, the Bit Error Rate (BER) of an
SSD \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mousavi:2023:DDA,
author = "Hamid Mousavi and Mohammad Loni and Mina Alibeigi and
Masoud Daneshtalab",
title = "{DASS}: Differentiable Architecture Search for Sparse
Neural Networks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "105:1--105:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609385",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609385",
abstract = "The deployment of Deep Neural Networks (DNNs) on edge
devices is hindered by the substantial gap between
performance requirements and available \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Clair:2023:SED,
author = "Judicael Clair and Guy Eichler and Luca P. Carloni",
title = "{SpikeHard}: Efficiency-Driven Neuromorphic Hardware
for Heterogeneous Systems-on-Chip",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "106:1--106:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609101",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609101",
abstract = "Neuromorphic computing is an emerging field with the
potential to offer performance and energy-efficiency
gains over traditional machine learning approaches.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Klashtorny:2023:PGW,
author = "Artem Klashtorny and Zhuanhao Wu and Anirudh Mohan
Kaushik and Hiren Patel",
title = "Predictable {GPU} Wavefront Splitting for
Safety-Critical Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "107:1--107:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609102",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609102",
abstract = "We present a predictable wavefront splitting (PWS)
technique for graphics processing units (GPUs). PWS
improves the performance of GPU applications by
reducing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "107",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Odema:2023:MMA,
author = "Mohanad Odema and Halima Bouzidi and Hamza Ouarnoughi
and Smail Niar and Mohammad Abdullah {Al Faruque}",
title = "{MaGNAS}: a Mapping-Aware Graph Neural Architecture
Search Framework for Heterogeneous {MPSoC} Deployment",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "108:1--108:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609386",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609386",
abstract = "Graph Neural Networks (GNNs) are becoming increasingly
popular for vision-based applications due to their
intrinsic capacity in modeling structural and
contextual \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "108",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mondal:2023:PPR,
author = "Anupam Mondal and Shreya Gangopadhyay and Durba
Chatterjee and Harishma Boyapally and Debdeep
Mukhopadhyay",
title = "{PReFeR}: Physically Related Function based Remote
Attestation Protocol",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "109:1--109:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609104",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609104",
abstract = "Remote attestation is a request-response based
security service that permits a trusted entity
(verifier) to check the current state of an untrusted
remote \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "109",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ikeda:2023:MDD,
author = "Sosei Ikeda and Hiromitsu Awano and Takashi Sato",
title = "Modular {DFR}: Digital Delayed Feedback Reservoir
Model for Enhancing Design Flexibility",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "110:1--110:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609105",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609105",
abstract = "A delayed feedback reservoir (DFR) is a type of
reservoir computing system well-suited for hardware
implementations owing to its simple structure. Most
existing \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "110",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mishra:2023:VVA,
author = "Vishesh Mishra and Sparsh Mittal and Neelofar Hassan
and Rekha Singhal and Urbi Chatterjee",
title = "{VADF}: Versatile Approximate Data Formats for
Energy-Efficient Computing",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "111:1--111:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609106",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609106",
abstract = "Approximate computing (AC) techniques provide overall
performance gains in terms of power and energy savings
at the cost of minor loss in application accuracy.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "111",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Halder:2023:OPN,
author = "Dipal Halder and Maneesh Merugu and Sandip Ray",
title = "{ObNoCs}: Protecting Network-on-Chip Fabrics Against
Reverse-Engineering Attacks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "112:1--112:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609107",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609107",
abstract = "Modern System-on-Chip designs typically use
Network-on-Chip (NoC) fabrics to implement coordination
among integrated hardware blocks. An important
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "112",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Basaklar:2023:DDT,
author = "Toygun Basaklar and A. Alper Goksoy and Anish
Krishnakumar and Suat Gumussoy and Umit Y. Ogras",
title = "{DTRL}: Decision Tree-based Multi-Objective
Reinforcement Learning for Runtime Task Scheduling in
Domain-Specific System-on-Chips",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "113:1--113:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609108",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609108",
abstract = "Domain-specific systems-on-chip (DSSoCs) combine
general-purpose processors and specialized hardware
accelerators to improve performance and energy
efficiency \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "113",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Juang:2023:LCG,
author = "Tzung-Han Juang and Christof Schlaak and Christophe
Dubach",
title = "Let Coarse-Grained Resources Be Shared: Mapping Entire
Neural Networks on {FPGAs}",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "114:1--114:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609109",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609109",
abstract = "Traditional High-Level Synthesis (HLS) provides rapid
prototyping of hardware accelerators without coding
with Hardware Description Languages (HDLs). \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "114",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bakshi:2023:CED,
author = "Suyash Bakshi and Lennart Johnsson",
title = "Computationally Efficient {DNN} Mapping Search
Heuristic using Deep Reinforcement Learning",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "115:1--115:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609110",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609110",
abstract = "In this work, we present a computationally efficient
Reinforcement Learning mapping search heuristic for
finding high quality mappings for N-dimensional
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "115",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hussein:2023:CNC,
author = "Dina Hussein and Ganapati Bhat",
title = "{CIM}: a Novel Clustering-based Energy-Efficient Data
Imputation Method for Human Activity Recognition",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "116:1--116:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609111",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609111",
abstract = "Human activity recognition (HAR) is an important
component in a number of health applications, including
rehabilitation, Parkinson's disease, daily activity
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "116",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ravi:2023:VLV,
author = "Akshara Ravi and Vivek Chaturvedi and Muhammad
Shafique",
title = "{ViT4Mal}: Lightweight Vision Transformer for Malware
Detection on Edge Devices",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "117:1--117:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609112",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609112",
abstract = "There has been a tremendous growth of edge devices
connected to the network in recent years. Although
these devices make our life simpler and smarter, they
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "117",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Deb:2023:ZDT,
author = "Dipika Deb and John Jose",
title = "{ZPP}: a Dynamic Technique to Eliminate Cache
Pollution in {NoC} based {MPSoCs}",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "118:1--118:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609113",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609113",
abstract = "Data prefetching efficiently reduces the memory access
latency in NUCA architectures as the Last Level Cache
(LLC) is shared and distributed across \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "118",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2023:WTM,
author = "Shin-Ting Wu and Liang-Chi Chen and Po-Chun Huang and
Yuan-Hao Chang and Chien-Chung Ho and Wei-Kuan Shih",
title = "{WARM}-tree: Making Quadtrees Write-efficient and
Space-economic on Persistent Memories",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "119:1--119:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608033",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608033",
abstract = "Recently, the value of data has been widely
recognized, which highlights the significance of
data-centric computing in diversified application
scenarios. In many \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "119",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shen:2023:TMS,
author = "Yixian Shen and Leo Schreuders and Anuj Pathania and
Andy D. Pimentel",
title = "Thermal Management for {$3$D}-Stacked Systems via
Unified Core-Memory Power Regulation",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "120:1--120:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608040",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608040",
abstract = "3D-stacked processor-memory systems stack memory (DRAM
banks) directly on top of logic (CPU cores) using
chiplet-on-chiplet packaging technology to provide
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "120",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ponzina:2023:OFC,
author = "Flavio Ponzina and Marco Rios and Alexandre Levisse
and Giovanni Ansaloni and David Atienza",
title = "Overflow-free Compute Memories for Edge {AI}
Acceleration",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "121:1--121:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609387",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609387",
abstract = "Compute memories are memory arrays augmented with
dedicated logic to support arithmetic. They support the
efficient execution of data-centric \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "121",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vali:2023:BSD,
author = "Kourosh Vali and Ata Vafi and Begum Kasap and Soheil
Ghiasi",
title = "{BASS}: Safe Deep Tissue Optical Sensing for Wearable
Embedded Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "122:1--122:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607916",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607916",
abstract = "In wearable optical sensing applications whose target
tissue is not superficial, such as deep tissue
oximetry, the task of embedded system design has to
strike a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "122",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Huai:2023:CCR,
author = "Shuo Huai and Hao Kong and Xiangzhong Luo and Shiqing
Li and Ravi Subramaniam and Christian Makaya and Qian
Lin and Weichen Liu",
title = "{CRIMP}: Compact \& Reliable {DNN} Inference on
In-Memory Processing via Crossbar-Aligned Compression
and Non-ideality Adaptation",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "123:1--123:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609115",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609115",
abstract = "Crossbar-based In-Memory Processing (IMP) accelerators
have been widely adopted to achieve high-speed and
low-power computing, especially for deep \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "123",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yen:2023:KBR,
author = "Chih-Hsuan Yen and Hashan Roshantha Mendis and Tei-Wei
Kuo and Pi-Cheng Hsiu",
title = "Keep in Balance: Runtime-reconfigurable Intermittent
Deep Inference",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "124:1--124:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607918",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607918",
abstract = "Intermittent deep neural network (DNN) inference is a
promising technique to enable intelligent applications
on tiny devices powered by ambient energy \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "124",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gufran:2023:FHR,
author = "Danish Gufran and Sudeep Pasricha",
title = "{FedHIL}: Heterogeneity Resilient Federated Learning
for Robust Indoor Localization with Mobile Devices",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "125:1--125:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607919",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607919",
abstract = "Indoor localization plays a vital role in applications
such as emergency response, warehouse management, and
augmented reality experiences. By deploying \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "125",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xia:2023:SPS,
author = "Chengpeng Xia and Yawen Chen and Haibo Zhang and
Jigang Wu",
title = "{STADIA}: Photonic Stochastic Gradient Descent for
Neural Network Accelerators",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "126:1--126:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607920",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607920",
abstract = "Deep Neural Networks (DNNs) have demonstrated great
success in many fields such as image recognition and
text analysis. However, the ever-increasing sizes
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "126",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chang:2023:LEL,
author = "Jung-Hsiu Chang and Tzu-Yu Chang and Yi-Chao Shih and
Tseng-Yi Chen",
title = "{LaDy}: Enabling Locality-aware Deduplication
Technology on Shingled Magnetic Recording Drives",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "127:1--127:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607921",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607921",
abstract = "The continuous increase in data volume has led to the
adoption of shingled-magnetic recording (SMR) as the
primary technology for modern storage drives. This
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "127",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lien:2023:FFS,
author = "Yi-Han Lien and Yen-Ting Chen and Yuan-Hao Chang and
Yu-Pei Liang and Wei-Kuan Shih",
title = "{FSIMR}: File-system-aware Data Management for
Interlaced Magnetic Recording",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "128:1--128:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607922",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607922",
abstract = "Interlaced Magnetic Recording (IMR) is an emerging
recording technology for hard-disk drives (HDDs) that
provides larger storage capacity at a lower cost. By
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "128",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2023:IIE,
author = "Wentong Li and Liang Shi and Hang Li and Changlong Li
and Edwin Hsing-Mean Sha",
title = "{IOSR}: Improving {I/O} Efficiency for Memory Swapping
on Mobile Devices Via Scheduling and Reshaping",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "129:1--129:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607923",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607923",
abstract = "Mobile systems and applications are becoming
increasingly feature-rich and powerful, which
constantly suffer from memory pressure, especially for
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "129",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Modi:2023:CRR,
author = "Garima Modi and Aritra Bagchi and Neetu Jindal and
Ayan Mandal and Preeti Ranjan Panda",
title = "{CABARRE}: Request Response Arbitration for Shared
Cache Management",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "130:1--130:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608096",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608096",
abstract = "Modern multi-processor systems-on-chip (MPSoCs) are
characterized by caches shared by multiple cores. These
shared caches receive requests issued by the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "130",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ahmed:2023:SAH,
author = "Soyed Tuhin Ahmed and Kamal Danouchi and Michael
Hefenbrock and Guillaume Prenat and Lorena Anghel and
Mehdi B. Tahoori",
title = "{SpinBayes}: Algorithm-Hardware Co-Design for
Uncertainty Estimation Using {Bayesian} In-Memory
Approximation on Spintronic-Based Architectures",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "131:1--131:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609116",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609116",
abstract = "Recent development in neural networks (NNs) has led to
their widespread use in critical and automated
decision-making systems, where uncertainty \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "131",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sharma:2023:FCD,
author = "Harsh Sharma and Lukas Pfromm and Rasit Onur Topaloglu
and Janardhan Rao Doppa and Umit Y. Ogras and Ananth
Kalyanraman and Partha Pratim Pande",
title = "Florets for Chiplets: Data Flow-aware High-Performance
and Energy-efficient Network-on-Interposer for {CNN}
Inference Tasks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "132:1--132:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608098",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608098",
abstract = "Recent advances in 2.5D chiplet platforms provide a
new avenue for compact scale-out implementations of
emerging compute- and data-intensive applications
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "132",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Nassar:2023:APM,
author = "Hassan Nassar and Lars Bauer and J{\"o}rg Henkel",
title = "{ANV-PUF}: Machine-Learning-Resilient {NVM}-Based
Arbiter {PUF}",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "133:1--133:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609388",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609388",
abstract = "Physical Unclonable Functions (PUFs) have been widely
considered an attractive security primitive. They use
the deviations in the fabrication process to have
unique \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "133",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sorrentino:2023:HCA,
author = "Giuseppe Sorrentino and Marco Venere and Davide
Conficconi and Eleonora D'Arnese and Marco Domenico
Santambrogio",
title = "{Hephaestus}: Codesigning and Automating {$3$D} Image
Registration on Reconfigurable Architectures",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "134:1--134:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607928",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607928",
abstract = "Healthcare is a pivotal research field, and medical
imaging is crucial in many applications. Therefore
finding new architectural and algorithmic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "134",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Tuncel:2023:SSC,
author = "Yigit Tuncel and Toygun Basaklar and Dina
Carpenter-Graffy and Umit Ogras",
title = "A Self-Sustained {CPS} Design for Reliable Wildfire
Monitoring",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "135:1--135:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608100",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608100",
abstract = "Continuous monitoring of areas nearby the electric
grid is critical for preventing and early detection of
devastating wildfires. Existing wildfire monitoring
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "135",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lohar:2023:SMF,
author = "Debasmita Lohar and Clothilde Jeangoudoux and
Anastasia Volkova and Eva Darulova",
title = "Sound Mixed Fixed-Point Quantization of Neural
Networks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "136:1--136:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609118",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609118",
abstract = "Neural networks are increasingly being used as
components in safety-critical applications, for
instance, as controllers in embedded systems. Their
formal \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "136",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bourke:2023:VCS,
author = "Timothy Bourke and Basile Pesin and Marc Pouzet",
title = "Verified Compilation of Synchronous Dataflow with
State Machines",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "137:1--137:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608102",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608102",
abstract = "Safety-critical embedded software is routinely
programmed in block-diagram languages. Recent work in
the V{\'e}lus project specifies such a language and its
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "137",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lee:2023:CVA,
author = "Edward A. Lee and Ravi Akella and Soroush Bateni and
Shaokai Lin and Marten Lohstroh and Christian Menard",
title = "Consistency vs. Availability in Distributed
Cyber-Physical Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "138:1--138:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609119",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609119",
abstract = "In distributed applications, Brewer's CAP theorem
tells us that when networks become partitioned (P), one
must give up either consistency (C) or availability
(A). \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "138",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Peeck:2023:IWC,
author = "Jonas Peeck and Rolf Ernst",
title = "Improving Worst-case {TSN} Communication Times of
Large Sensor Data Samples by Exploiting
Synchronization",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "139:1--139:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609120",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609120",
abstract = "Higher levels of automated driving also require a more
sophisticated environmental perception. Therefore, an
increasing number of sensors transmit their data
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "139",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chou:2023:RSK,
author = "Yi-Quan Chou and Lin-Wei Shen and Li-Pin Chang",
title = "Rectifying Skewed Kernel Page Reclamation in Mobile
Devices for Improving User-Perceivable Latency",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "140:1--140:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607937",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607937",
abstract = "A crucial design factor for users of smart mobile
devices is the latency of graphical interface
interaction. Switching a background app to foreground
is a frequent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "140",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Majumdar:2023:NAB,
author = "Rupak Majumdar and Mahmoud Salamati and Sadegh
Soudjani",
title = "Neural Abstraction-Based Controller Synthesis and
Deployment",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "141:1--141:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608104",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608104",
abstract = "Abstraction-based techniques are an attractive
approach for synthesizing correct-by-construction
controllers to satisfy high-level temporal
requirements. A \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "141",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Khan:2023:DDL,
author = "Osama Khan and Gwanjong Park and Euiseong Seo",
title = "{DaCapo}: an On-Device Learning Scheme for
Memory-Constrained Embedded Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "142:1--142:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609121",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609121",
abstract = "The use of deep neural network (DNN) applications in
microcontroller unit (MCU) embedded systems is getting
popular. However, the DNN models in such \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "142",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gunzel:2023:PRT,
author = "Mario G{\"u}nzel and Niklas Ueter and Kuan-Hsun Chen
and Georg von der Br{\"u}ggen and Jian-Jia Chen",
title = "Probabilistic Reaction Time Analysis",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "143:1--143:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609390",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609390",
abstract = "In many embedded systems, for instance, in the
automotive, avionic, or robotics domain, critical
functionalities are implemented via chains of
communicating recurrent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "143",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Vreman:2023:SAC,
author = "Nils Vreman and Martina Maggio",
title = "Stochastic Analysis of Control Systems Subject to
Communication and Computation Faults",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "144:1--144:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609123",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609123",
abstract = "Control theory allows one to design controllers that
are robust to external disturbances, model
simplification, and modelling inaccuracy. Researchers
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "144",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zheng:2023:IIA,
author = "Yongchun Zheng and Changlong Li and Yi Xiong and
Weihong Liu and Cheng Ji and Zongwei Zhu and Lichen
Yu",
title = "{iAware}: Interaction Aware Task Scheduling for
Reducing Resource Contention in Mobile Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "145:1--145:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609391",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609391",
abstract = "To ensure the user experience of mobile systems, the
foreground application can be differentiated to
minimize the impact of background applications.
However, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "145",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2023:FSN,
author = "Hanrui Zhao and Niuniu Qi and Lydia Dehbi and Xia Zeng
and Zhengfeng Yang",
title = "Formal Synthesis of Neural Barrier Certificates for
Continuous Systems via Counterexample Guided Learning",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "146:1--146:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609125",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609125",
abstract = "This paper presents a novel approach to safety
verification based on neural barrier certificates
synthesis for continuous dynamical systems. We
construct \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "146",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Loveless:2023:CML,
author = "Andrew Loveless and Linh Thi Xuan Phan and Lisa
Erickson and Ronald Dreslinski and Baris Kasikci",
title = "{CrossTalk}: Making Low-Latency Fault Tolerance Cheap
by Exploiting Redundant Networks",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "147:1--147:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609436",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609436",
abstract = "Real-time embedded systems perform many important
functions in the modern world. A standard way to
tolerate faults in these systems is with Byzantine
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "147",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shijubo:2023:PBB,
author = "Junya Shijubo and Masaki Waga and Kohei Suenaga",
title = "Probabilistic Black-Box Checking via Active {MDP}
Learning",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "148:1--148:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609127",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609127",
abstract = "We introduce a novel methodology for testing
stochastic black-box systems, frequently encountered in
embedded systems. Our approach enhances \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "148",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Singh:2023:KWC,
author = "Nikhilesh Singh and Karthikeyan Renganathan and
Chester Rebeiro and Jithin Jose and Ralph Mader",
title = "{Kryptonite}: Worst-Case Program Interference
Estimation on Multi-Core Embedded Systems",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "149:1--149:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609128",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609128",
abstract = "Due to the low costs and energy needed, cyber-physical
systems are adopting multi-core processors for their
embedded computing requirements. In order \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "149",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Baruah:2023:OSR,
author = "Sanjoy Baruah and Alan Burns and Robert Ian Davis",
title = "Optimal Synthesis of Robust {IDK} Classifier
Cascades",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "150:1--150:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609129",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609129",
abstract = "An IDK classifier is a computing component that
categorizes inputs into one of a number of classes, if
it is able to do so with the required level of
confidence, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "150",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Brun:2023:EDA,
author = "L{\'e}lio Brun and Christophe Garion and
Pierre-Lo{\"\i}c Garoche and Xavier Thirioux",
title = "Equation-Directed Axiomatization of {Lustre} Semantics
to Enable Optimized Code Validation",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "151:1--151:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609393",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609393",
abstract = "Model-based design tools like SCADE Suite and Simulink
are often used to design safety-critical embedded
software. Consequently, generating correct \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "151",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Colaco:2023:CSB,
author = "Jean-Louis Cola{\c{c}}o and Michael Mendler and
Baptiste Pauget and Marc Pouzet",
title = "A Constructive State-based Semantics and Interpreter
for a Synchronous Data-flow Language with State
Machines",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "152:1--152:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609131",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609131",
abstract = "Scade is a domain-specific synchronous functional
language used to implement safety-critical real-time
software for more than twenty years. Two main
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "152",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Thilakasiri:2023:MRP,
author = "Thilanka Thilakasiri and Matthias Becker",
title = "Methods to Realize Preemption in Phased Execution
Models",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "153:1--153:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609132",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609132",
abstract = "Phased execution models are a good solution to tame
the increased complexity and contention of commercial
off-the-shelf (COTS) multi-core platforms, e.g.,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "153",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Szeto:2023:BAB,
author = "Matthew Szeto and Edward Andert and Aviral Shrivastava
and Martin Reisslein and Chung-Wei Lin and Christ
Richmond",
title = "{B-AWARE}: Blockage Aware {RSU} Scheduling for {5G}
Enabled Autonomous Vehicles",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "154:1--154:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609133",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609133",
abstract = "5G Millimeter Wave (mmWave) technology holds great
promise for Connected Autonomous Vehicles (CAVs) due to
its ability to achieve data rates in the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "154",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lin:2023:TBV,
author = "Shaokai Lin and Yatin A. Manerkar and Marten Lohstroh
and Elizabeth Polgreen and Sheng-Jung Yu and Chadlia
Jerad and Edward A. Lee and Sanjit A. Seshia",
title = "Towards Building Verifiable {CPS} using {Lingua
Franca}",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "155:1--155:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609134",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609134",
abstract = "Formal verification of cyber-physical systems (CPS) is
challenging because it has to consider real-time and
concurrency aspects that are often absent in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "155",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bartocci:2023:MHU,
author = "Ezio Bartocci and Cristinel Mateis and Eleonora
Nesterini and Dejan Ni{\v{c}}kovi{\'c}",
title = "Mining Hyperproperties using Temporal Logics",
journal = j-TECS,
volume = "22",
number = "5s",
pages = "156:1--156:??",
month = oct,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609394",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Sep 18 08:59:39 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609394",
abstract = "Formal specifications are essential to express
precisely systems, but they are often difficult to
define or unavailable. Specification mining aims to
automatically infer \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "156",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Metz:2023:BBS,
author = "David Metz and Vineet Kumar and Magnus Sj{\"a}lander",
title = "{BISDU}: a Bit-Serial Dot-Product Unit for
Microcontrollers",
journal = j-TECS,
volume = "22",
number = "5",
pages = "79:1--79:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608447",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/risc-v.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608447",
abstract = "Low-precision quantized neural networks (QNNs) reduce
the required memory space, bandwidth, and computational
power, and hence are suitable for deployment in
applications such as IoT edge devices. Mixed-precision
QNNs, where weights commonly have lower precision than
activations or different precision is used for
different layers, can limit the accuracy loss caused by
low-bit quantization, while still benefiting from
reduced memory footprint and faster execution. Previous
multiple-precision functional units supporting 8-bit,
4-bit, and 2-bit SIMD instructions have limitations,
such as large area overhead, under-utilization of
multipliers, and wasted memory space for low and mixed
bit-width operations.\par
This article introduces BISDU, a bit-serial dot-product
unit to support and accelerate execution of
mixed-precision low-bit QNNs on resource-constrained
microcontrollers. BISDU is a multiplier-less
dot-product unit, with frugal hardware requirements (a
population count unit and 2:1 multiplexers). The
proposed bit-serial dot-product unit leverages the
conventional logical operations of a microcontroller to
perform multiplications, which enables efficient
software implementations of binary (Xnor), ternary
(Xor), and mixed-precision [W $ \times $ A] (And)
dot-product operations.\par
The experimental results show that BISDU achieves
competitive performance compared to two
state-of-the-art units, XpulpNN and Dustin, when
executing low-bit-width CNNs. We demonstrate the
advantage that bit-serial execution provides by
enabling trading accuracy against weight footprint and
execution time. BISDU increases the area of the ALU by
68\% and the ALU power consumption by 42\% compared to
a baseline 32-bit RISC-V (RV32IC) microcontroller core.
In comparison, XpulpNN and Dustin increase the area by
6.9$ \times $ and 11.1 $ \times $ and the power
consumption by 3.8$ \times $ and 5.97$ \times $,
respectively. The bit-serial state-of-the-art, based on
a conventional popcount instruction, increases the area
by 42\% and power by 32\%, with BISDU providing a 37\%
speedup over it.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kwon:2023:LRT,
author = "Hyeokdong Kwon and Hyunjun Kim and Minjoo Sim and
Wai-Kong Lee and Hwajeong Seo",
title = "Look-up the Rainbow: Table-based Implementation of
Rainbow Signature on 64-bit {ARMv8} Processors",
journal = j-TECS,
volume = "22",
number = "5",
pages = "80:1--80:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607140",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607140",
abstract = "The Rainbow Signature Scheme is one of the finalists
in the National Institute of Standards and Technology
(NIST) Post-Quantum Cryptography (PQC) standardization
competition, but failed to win because it has lack of
stability in the parameter selection \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Schneider:2023:CCM,
author = "Klaus Schneider and Anoop Bhagyanath",
title = "Consistency Constraints for Mapping Dataflow Graphs to
Hybrid Dataflow\slash {von Neumann} Architectures",
journal = j-TECS,
volume = "22",
number = "5",
pages = "81:1--81:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607869",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607869",
abstract = "Dataflow process networks (DPNs) provide a convenient
model of computation that is often used to model system
behavior in model-based designs. With fixed sets of
nodes, they are also used as dataflow graphs as an
intermediate program representation by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Caronti:2023:FGH,
author = "Luca Caronti and Khakim Akhunov and Matteo Nardello
and Kasim Sinan Yildirim and Davide Brunelli",
title = "Fine-grained Hardware Acceleration for Efficient
Batteryless Intermittent Inference on the Edge",
journal = j-TECS,
volume = "22",
number = "5",
pages = "82:1--82:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3608475",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3608475",
abstract = "Backing up the intermediate results of
hardware-accelerated deep inference is crucial to
ensure the progress of execution on batteryless
computing platforms. However, hardware accelerators in
low-power AI platforms only support the one-shot atomic
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lei:2023:FII,
author = "Douwei Lei and Debiao He and Cong Peng and Min Luo and
Zhe Liu and Xinyi Huang",
title = "Faster Implementation of Ideal Lattice-Based
Cryptography Using {AVX512}",
journal = j-TECS,
volume = "22",
number = "5",
pages = "83:1--83:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609223",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609223",
abstract = "With the development of quantum computing, the
existing cryptography schemes based on classical
cryptographic primitives will no longer be secure.
Hence, cryptographers are designing post-quantum
cryptographic (PQC) schemes, and ideal lattice-based
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2023:RCR,
author = "Wei-Ju Chen and Peng Wu and Pei-Chi Huang and Aloysius
K. Mok and Song Han",
title = "Regular Composite Resource Partitioning and
Reconfiguration in Open Systems",
journal = j-TECS,
volume = "22",
number = "5",
pages = "84:1--84:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609424",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609424",
abstract = "We consider the problem of resource provisioning for
real-time cyber-physical applications in an open system
environment where there does not exist a global
resource scheduler that has complete knowledge of the
real-time performance requirements of each \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Inagaki:2023:PSC,
author = "Saya Inagaki and Mingyu Yang and Yang Li and Kazuo
Sakiyama and Yuko Hara-Azumi",
title = "Power Side-channel Attack Resistant Circuit Designs of
{ARX} Ciphers Using High-level Synthesis",
journal = j-TECS,
volume = "22",
number = "5",
pages = "85:1--85:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609507",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609507",
abstract = "In the Internet of Things (IoT) era, edge devices have
been considerably diversified and are often designed
using high-level synthesis (HLS) for improved design
productivity. However, HLS tools were originally
developed in a security-unaware manner, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Luo:2023:SEF,
author = "Yuling Luo and Shiqi Zhang and Shunsheng Zhang and
Junxiu Liu and Yanhu Wang and Su Yang",
title = "A Secure and Efficient Framework for Outsourcing
Large-scale Matrix Determinant and Linear Equations",
journal = j-TECS,
volume = "22",
number = "5",
pages = "86:1--86:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3611014",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3611014",
abstract = "Large-scale matrix determinants and linear equations
are two basic computational tools in science and
engineering fields. However, it is difficult for a
resource-constrained client to solve large-scale
computational tasks. Cloud computing service provides
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Calsi:2023:IRA,
author = "Davide Li Calsi and Vittorio Zaccaria",
title = "Interruptible Remote Attestation of Low-end {IoT}
Microcontrollers via Performance Counters",
journal = j-TECS,
volume = "22",
number = "5",
pages = "87:1--87:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3611674",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3611674",
abstract = "Remote attestation is a method used in distributed
systems to detect integrity violations on a target
device (prover) through a challenge-response protocol
initiated by a verifier device. The prover calculates a
hash of its memory, which is compared to a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Indrusiak:2023:RTG,
author = "Leandro Soares Indrusiak and Alan Burns",
title = "Real-Time Guarantees in Routerless Networks-on-Chip",
journal = j-TECS,
volume = "22",
number = "5",
pages = "88:1--88:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3616539",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Mon Oct 2 15:31:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3616539",
abstract = "This article considers the use of routerless
networks-on-chip as an alternative on-chip interconnect
for multi-processor systems requiring hard real-time
guarantees for inter-processor communication. It
presents a novel analytical framework that can
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2023:SIA,
author = "Yun (Eric) Liang and Wei Zhang and Stephen
Neuendorffer and Wayne Luk",
title = "Special Issue: {``AI Acceleration on FPGAs''}",
journal = j-TECS,
volume = "22",
number = "6",
pages = "89:1--89:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626323",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3626323",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hu:2023:HPR,
author = "Xianghong Hu and Hongmin Huang and Xueming Li and Xin
Zheng and Qinyuan Ren and Jingyu He and Xiaoming
Xiong",
title = "High-performance Reconfigurable {DNN} Accelerator on a
Bandwidth-limited Embedded System",
journal = j-TECS,
volume = "22",
number = "6",
pages = "90:1--90:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530818",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3530818",
abstract = "Deep convolutional neural networks (DNNs) have been
widely used in many applications, particularly in
machine vision. It is challenging to accelerate DNNs on
embedded systems because real-world machine vision
applications should reserve a lot of external
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2023:FCF,
author = "Xiaoyang Wang and Zhe Zhou and Zhihang Yuan and
Jingchen Zhu and Yulong Cao and Yao Zhang and Kangrui
Sun and Guangyu Sun",
title = "{FD-CNN}: a Frequency-Domain {FPGA} Acceleration
Scheme for {CNN}-Based Image-Processing Applications",
journal = j-TECS,
volume = "22",
number = "6",
pages = "91:1--91:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3559105",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3559105",
abstract = "In the emerging edge-computing scenarios, FPGAs have
been widely adopted to accelerate convolutional neural
network (CNN)-based image-processing applications, such
as image classification, object detection, and image
segmentation, and so on. A standard \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ma:2023:ICD,
author = "Zhengzheng Ma and Tuo Dai and Xuechao Wei and Guojie
Luo",
title = "An Intermediate-Centric Dataflow for Transposed
Convolution Acceleration on {FPGA}",
journal = j-TECS,
volume = "22",
number = "6",
pages = "92:1--92:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561053",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3561053",
abstract = "Transposed convolution has been prevailing in
convolutional neural networks (CNNs), playing an
important role in multiple scenarios such as image
segmentation and back-propagation process of training
CNNs. This mainly benefits from the ability to up-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ye:2023:AAM,
author = "Wenhua Ye and Xu Zhou and Joey Zhou and Cen Chen and
Kenli Li",
title = "Accelerating Attention Mechanism on {FPGAs} based on
Efficient Reconfigurable Systolic Array",
journal = j-TECS,
volume = "22",
number = "6",
pages = "93:1--93:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3549937",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3549937",
abstract = "Transformer model architectures have recently received
great interest in natural language, machine
translation, and computer vision, where attention
mechanisms are their building blocks. However, the
attention mechanism is expensive because of its
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alam:2023:RIF,
author = "Syed Asad Alam and David Gregg and Giulio Gambardella
and Thomas Preusser and Michaela Blott",
title = "On the {RTL} Implementation of {FINN} Matrix Vector
Unit",
journal = j-TECS,
volume = "22",
number = "6",
pages = "94:1--94:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3547141",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3547141",
abstract = "Field-programmable gate array (FPGA)-based
accelerators are becoming increasingly popular for deep
neural network (DNN) inference due to their ability to
scale performance with increasing degrees of
specialization with dataflow architectures or custom
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Feng:2023:ADS,
author = "Kaijie Feng and Xiaoya Fan and Jianfeng An and Chuxi
Li and Kaiyue Di and Jiangfei Li",
title = "{ACDSE}: a Design Space Exploration Method for {CNN}
Accelerator based on Adaptive Compression Mechanism",
journal = j-TECS,
volume = "22",
number = "6",
pages = "95:1--95:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3545177",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3545177",
abstract = "Customized accelerators for Convolutional Neural
Network (CNN) can achieve better energy efficiency than
general computing platforms. However, the design of a
high-performance accelerator should take into account a
variety of parameters and physical \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shu:2023:TID,
author = "Jiwu Shu and Kedong Fang and Youmin Chen and Shuo
Wang",
title = "{TH-iSSD}: Design and Implementation of a Generic and
Reconfigurable Near-Data Processing Framework",
journal = j-TECS,
volume = "22",
number = "6",
pages = "96:1--96:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563456",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3563456",
abstract = "We present the design and implementation of TH-iSSD, a
near-data processing framework to address the data
movement problem. TH-iSSD does not pose any restriction
to the hardware selection and is highly
reconfigurable-its core components, such as the on-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fu:2023:RRB,
author = "Yu Fu and Jingqiang Lin and Dengguo Feng and Wei Wang
and Mingyu Wang and Wenjie Wang",
title = "{RegKey}: a Register-based Implementation of {ECC}
Signature Algorithms Against One-shot Memory
Disclosure",
journal = j-TECS,
volume = "22",
number = "6",
pages = "97:1--97:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604805",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3604805",
abstract = "To ensure the security of cryptographic algorithm
implementations, several cryptographic key protection
schemes have been proposed to prevent various memory
disclosure attacks. Among them, the register-based
solutions do not rely on special hardware \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Min:2023:SBM,
author = "Chulhong Min and Akhil Mathur and Utku G{\"u}nay Acer
and Alessandro Montanari and Fahim Kawsar",
title = "{SensiX++}: Bringing {MLOps} and Multi-tenant Model
Serving to Sensory Edge Devices",
journal = j-TECS,
volume = "22",
number = "6",
pages = "98:1--98:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617507",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3617507",
abstract = "We present SensiX++, a multi-tenant runtime for
adaptive model execution with integrated MLOps on edge
devices, e.g., a camera, a microphone, or IoT sensors.
SensiX++ operates on two fundamental principles: highly
modular componentisation to externalise \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{ElYaacoub:2023:SDS,
author = "Ahmed {El Yaacoub} and Luca Mottola and Thiemo Voigt
and Philipp R{\"u}mmer",
title = "Scheduling Dynamic Software Updates in Mobile Robots",
journal = j-TECS,
volume = "22",
number = "6",
pages = "99:1--99:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3623676",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3623676",
abstract = "We present NeRTA (Next Release Time Analysis), a
technique to enable dynamic software updates for
low-level control software of mobile robots. Dynamic
software updates enable software correction and
evolution during system operation. In mobile robotics,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Samaddar:2023:ODS,
author = "Ankita Samaddar and Arvind Easwaran",
title = "Online Distributed Schedule Randomization to Mitigate
Timing Attacks in Industrial Control Systems",
journal = j-TECS,
volume = "22",
number = "6",
pages = "100:1--100:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624584",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3624584",
abstract = "Industrial control systems (ICSs) consist of a large
number of control applications that are associated with
periodic real-time flows with hard deadlines. To
facilitate large-scale integration, remote control, and
co-ordination, wireless sensor and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2023:SFA,
author = "Jun-Shen Wu and Tsen-Wei Hsu and Ren-Shuo Liu",
title = "{SG-Float}: Achieving Memory Access and Computing
Power Reduction Using Self-Gating Float in {CNNs}",
journal = j-TECS,
volume = "22",
number = "6",
pages = "101:1--101:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624582",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3624582",
abstract = "Convolutional neural networks (CNNs) are essential for
advancing the field of artificial intelligence.
However, since these networks are highly demanding in
terms of memory and computation, implementing CNNs can
be challenging. To make CNNs more \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hung:2023:EEC,
author = "Chen-Tui Hung and Kai Xuan Lee and Yi-Zheng Liu and
Ya-Shu Chen and Zhong-Han Chan",
title = "Energy-Efficient Communications for Improving Timely
Progress of Intermittent-Powered {BLE} Devices",
journal = j-TECS,
volume = "22",
number = "6",
pages = "102:1--102:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626197",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3626197",
abstract = "Battery-less devices offer potential solutions for
maintaining sustainable Internet of Things (IoT)
networks. However, limited energy harvesting capacity
can lead to power failures, limiting the system's
quality of service (QoS). To improve timely task
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Javadi:2023:CME,
author = "Mohammad Haji Seyed Javadi and Mohsen Faryabi and
Hamid Reza Mahdiani",
title = "A Comprehensive Model for Efficient Design Space
Exploration of Imprecise Computational Blocks",
journal = j-TECS,
volume = "22",
number = "6",
pages = "103:1--103:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3625555",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3625555",
abstract = "After almost a decade of research, development of more
efficient imprecise computational blocks is still a
major concern in imprecise computing domain. There are
many instances of the introduced imprecise components
of different types, while their main \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Siddhu:2023:DTM,
author = "Lokesh Siddhu and Aritra Bagchi and Rajesh Kedia and
Isaar Ahmad and Shailja Pandey and Preeti Ranjan
Panda",
title = "Dynamic Thermal Management of {$3$D} Memory through
Rotating Low Power States and Partial Channel Closure",
journal = j-TECS,
volume = "22",
number = "6",
pages = "104:1--104:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3624581",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3624581",
abstract = "Modern high-performance and high-bandwidth
three-dimensional (3D) memories are characterized by
frequent heating. Prior art suggests turning off hot
channels and migrating data to the background DDR
memory, incurring significant performance and energy
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2023:EBN,
author = "Erwei Wang and James J. Davis and Daniele Moro and
Piotr Zielinski and Jia Jie Lim and Claudionor Coelho
and Satrajit Chatterjee and Peter Y. K. Cheung and
George A. Constantinides",
title = "Enabling Binary Neural Network Training on the Edge",
journal = j-TECS,
volume = "22",
number = "6",
pages = "105:1--105:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626100",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3626100",
abstract = "The ever-growing computational demands of increasingly
complex machine learning models frequently necessitate
the use of powerful cloud-based infrastructure for
their training. Binary neural networks are known to be
promising candidates for on-device \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "105",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Farahmand:2023:DAH,
author = "Ebrahim Farahmand and Ali Mahani and Muhammad Abdullah
Hanif and Muhammad Shafique",
title = "Design and Analysis of High Performance Heterogeneous
Block-based Approximate Adders",
journal = j-TECS,
volume = "22",
number = "6",
pages = "106:1--106:??",
month = nov,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3625686",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:46 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3625686",
abstract = "Approximate computing is an emerging paradigm to
improve the power and performance efficiency of
error-resilient applications. As adders are one of the
key components in almost all processing systems, a
significant amount of research has been carried out
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "106",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Casini:2024:ISI,
author = "Daniel Casini and Dakshina Dasari and Matthias Becker
and Giorgio Buttazzo",
title = "Introduction to the Special Issue on Real-Time
Computing in the {IoT}-to-Edge-to-Cloud Continuum",
journal = j-TECS,
volume = "23",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605180",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605180",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chen:2024:DTO,
author = "Ying Chen and Jie Zhao and Jintao Hu and Shaohua Wan
and Jiwei Huang",
title = "Distributed Task Offloading and Resource Purchasing in
{NOMA-Enabled} Mobile Edge Computing: Hierarchical Game
Theoretical Approaches",
journal = j-TECS,
volume = "23",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597023",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3597023",
abstract = "As the computing resources and the battery capacity of
mobile devices are usually limited, it is a feasible
solution to offload the computation-intensive tasks
generated by mobile devices to edge servers (ESs) in
mobile edge computing (MEC). In this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cucinotta:2024:MCO,
author = "Tommaso Cucinotta and Alexandre Amory and Gabriele Ara
and Francesco Paladino and Marco {Di Natale}",
title = "Multi-criteria Optimization of Real-time {DAGs} on
Heterogeneous Platforms under {P-EDF}",
journal = j-TECS,
volume = "23",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592609",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3592609",
abstract = "This article tackles the problem of optimal placement
of complex real-time embedded applications on
heterogeneous platforms. Applications are composed of
directed acyclic graphs of tasks, with each
directed-acyclic-graph (DAG) having a minimum inter-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Struhar:2024:HRO,
author = "V{\'a}clav Struh{\'a}r and Silviu S. Craciunas and
Mohammad Ashjaei and Moris Behnam and Alessandro V.
Papadopoulos",
title = "Hierarchical Resource Orchestration Framework for
Real-time Containers",
journal = j-TECS,
volume = "23",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592856",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/doi/10.1145/3592856",
abstract = "Container-based virtualization is a promising
deployment model in fog and edge computing
applications, because it allows a seamless co-existence
of virtualized applications in a heterogeneous
environment without introducing significant overhead.
Certain \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Barletta:2024:CAM,
author = "Marco Barletta and Marcello Cinque and Luigi {De
Simone} and Raffaele {Della Corte}",
title = "Criticality-aware Monitoring and Orchestration for
Containerized Industry 4.0 Environments",
journal = j-TECS,
volume = "23",
number = "1",
pages = "5:1--5:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604567",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3604567",
abstract = "The evolution of industrial environments makes the
reconfigurability and flexibility key requirements to
rapidly adapt to changeable market needs. Computing
paradigms like Edge/Fog computing are able to provide
the required flexibility and scalability \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ghosh:2024:PPA,
author = "Soumendu Kumar Ghosh and Arnab Raha and Vijay
Raghunathan and Anand Raghunathan",
title = "{PArtNNer}: Platform-Agnostic Adaptive Edge-Cloud
{DNN} Partitioning for Minimizing End-to-End Latency",
journal = j-TECS,
volume = "23",
number = "1",
pages = "6:1--6:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3630266",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3630266",
abstract = "The last decade has seen the emergence of Deep Neural
Networks (DNNs) as the de facto algorithm for various
computer vision applications. In intelligent edge
devices, sensor data streams acquired by the device are
processed by a DNN application running on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "6",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xu:2024:SLB,
author = "Haitao Xu and Saiyu Qi and Yong Qi and Wei Wei and
Naixue Xiong",
title = "Secure and Lightweight Blockchain-based Truthful Data
Trading for Real-Time Vehicular Crowdsensing",
journal = j-TECS,
volume = "23",
number = "1",
pages = "7:1--7:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3582008",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3582008",
abstract = "As the number of smart cars grows rapidly, vehicular
crowdsensing (VCS) is gradually becoming popular. In a
VCS infrastructure, sensing devices and computing units
hold on smart cars as well as cloud servers form an
IoT-edge-cloud continuum to perform \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "7",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Oza:2024:DAT,
author = "Pratham Oza and Nathaniel Hudson and Thidapat Chantem
and Hana Khamfroush",
title = "Deadline-Aware Task Offloading for Vehicular Edge
Computing Networks Using Traffic Light Data",
journal = j-TECS,
volume = "23",
number = "1",
pages = "8:1--8:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3594541",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3594541",
abstract = "As vehicles have become increasingly automated, novel
vehicular applications have emerged to enhance the
safety and security of the vehicles and improve user
experience. This brings ever-increasing data and
resource requirements for timely computation by
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "8",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gaitan:2024:MOC,
author = "Miguel Guti{\'e}rrez Gait{\'a}n and Lu{\'\i}s Almeida
and Pedro M. D'orey and Pedro M. Santos and Thomas
Watteyne",
title = "Minimal-Overlap Centrality for Multi-Gateway
Designation in Real-Time {TSCH} Networks",
journal = j-TECS,
volume = "23",
number = "1",
pages = "9:1--9:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3610583",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3610583",
abstract = "This article presents a novel centrality-driven
gateway designation framework for the improved
real-time performance of low-power wireless sensor
networks (WSNs) at system design time. We target
time-synchronized channel hopping (TSCH) WSNs with
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "9",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Iyer:2024:HCM,
author = "Vishnuvardhan V. Iyer and Aditya Thimmaiah and Michael
Orshansky and Andreas Gerstlauer and Ali E. Yilmaz",
title = "A Hierarchical Classification Method for High-accuracy
Instruction Disassembly with Near-field {EM}
Measurements",
journal = j-TECS,
volume = "23",
number = "1",
pages = "10:1--10:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3629167",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3629167",
abstract = "Electromagnetic (EM) fields have been extensively
studied as potent side-channel tools for testing the
security of hardware implementations. In this work, a
low-cost side-channel disassembler that uses
fine-grained EM signals to predict a program's
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "10",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2024:EAA,
author = "Yi-Wen Zhang and Hui Zheng and Zonghua Gu",
title = "Energy-Aware Adaptive Mixed-Criticality Scheduling
with Semi-Clairvoyance and Graceful Degradation",
journal = j-TECS,
volume = "23",
number = "1",
pages = "11:1--11:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632749",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3632749",
abstract = "The classic Mixed-Criticality System (MCS) task model
is a non-clairvoyance model in which the change of the
system behavior is based on the completion of
high-criticality tasks while dropping low-criticality
tasks in high-criticality mode. In this paper,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "11",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bagchi:2024:CCA,
author = "Aritra Bagchi and Dinesh Joshi and Preeti Ranjan
Panda",
title = "{COBRRA}: {COntention-aware} cache Bypass with
Request-Response Arbitration",
journal = j-TECS,
volume = "23",
number = "1",
pages = "12:1--12:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632748",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3632748",
abstract = "In modern multi-processor systems-on-chip (MPSoCs),
requests from different processor cores, accelerators,
and their responses from the lower-level memory contend
for the shared cache bandwidth, making it a critical
performance bottleneck. Prior research \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "12",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shin:2024:VEM,
author = "Yong-Jun Shin and Donghwan Shin and Doo-Hwan Bae",
title = "Virtual Environment Model Generation for {CPS} Goal
Verification using Imitation Learning",
journal = j-TECS,
volume = "23",
number = "1",
pages = "13:1--13:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3633804",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3633804",
abstract = "Cyber-Physical Systems (CPS) continuously interact
with their physical environments through embedded
software controllers that observe the environments and
determine actions. Field Operational Tests (FOT) are
essential to verify to what extent the CPS \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "13",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Yu:2024:MAE,
author = "Wangyang Yu and Jinming Kong and Zhijun Ding and
Xiaojun Zhai and Zhiqiang Li and Qi Guo",
title = "Modeling and Analysis of {ETC} Control System with
Colored {Petri} Net and Dynamic Slicing",
journal = j-TECS,
volume = "23",
number = "1",
pages = "14:1--14:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3633450",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3633450",
abstract = "Nowadays, Electronic Toll Collection (ETC) control
systems have been widely adopted to smoothen traffic
flow on highways. However, as it is a complex business
interaction system, there are inevitably flaws in its
control logic process, such as the problem \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "14",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{He:2024:REA,
author = "Zhijian He and Bohuan Xue and Xiangcheng Hu and
Zhaoyan Shen and Xiangyue Zeng and Ming Liu",
title = "Robust Embedded Autonomous Driving Positioning System
Fusing {LiDAR} and Inertial Sensors",
journal = j-TECS,
volume = "23",
number = "1",
pages = "15:1--15:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626098",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3626098",
abstract = "Autonomous driving emphasizes precise multi-sensor
fusion positioning on limit resource embedded systems.
LiDAR-centered sensor fusion system serves as a
mainstream navigation system due to its insensitivity
to illumination and viewpoint change. However,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Qi:2024:MCS,
author = "Huamei Qi and Fang Ren and Leilei Wang and Ping Jiang
and Shaohua Wan and Xiaoheng Deng",
title = "Multi-Compression Scale {DNN} Inference Acceleration
based on Cloud-Edge-End Collaboration",
journal = j-TECS,
volume = "23",
number = "1",
pages = "16:1--16:??",
month = jan,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3634704",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Sat Feb 3 11:10:48 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3634704",
abstract = "Edge intelligence has emerged as a promising paradigm
to accelerate DNN inference by model partitioning,
which is particularly useful for intelligent scenarios
that demand high accuracy and low latency. However, the
dynamic nature of the edge environment \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "16",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Que:2024:LGL,
author = "Zhiqiang Que and Hongxiang Fan and Marcus Loo and He
Li and Michaela Blott and Maurizio Pierini and
Alexander Tapper and Wayne Luk",
title = "{LL-GNN}: Low Latency Graph Neural Networks on {FPGAs}
for High Energy Physics",
journal = j-TECS,
volume = "23",
number = "2",
pages = "17:1--17:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3640464",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3640464",
abstract = "This work presents a novel reconfigurable architecture
for Low Latency Graph Neural Network (LL-GNN) designs
for particle detectors, delivering unprecedented low
latency performance. Incorporating FPGA-based GNNs into
particle detectors presents a unique \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alsubhi:2024:SFE,
author = "Arwa Alsubhi and Simeon Babatunde and Nicole Tobias
and Jacob Sorber",
title = "{Stash}: Flexible Energy Storage for Intermittent
Sensors",
journal = j-TECS,
volume = "23",
number = "2",
pages = "18:1--18:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3641511",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3641511",
abstract = "Batteryless sensors promise a sustainable future for
sensing, but they face significant challenges when
storing and using environmental energy. Incoming energy
can fluctuate unpredictably between periods of scarcity
and abundance, and device performance \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "18",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhao:2024:ICV,
author = "Liang Zhao and Hongxuan Li and Enchao Zhang and Ammar
Hawbani and Mingwei Lin and Shaohua Wan and Mohsen
Guizani",
title = "Intelligent Caching for Vehicular Dew Computing in
Poor Network Connectivity Environments",
journal = j-TECS,
volume = "23",
number = "2",
pages = "19:1--19:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643038",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3643038",
abstract = "In vehicular networks, some edge servers may not
function properly due to the time-varying load
condition and the uneven computing resource
distribution, resulting in a low quality of caching
services. To overcome this challenge, we develop a
Vehicular \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "19",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sah:2024:ATE,
author = "Ramesh Kumar Sah and Hassan Ghasemzadeh",
title = "Adversarial Transferability in Embedded Sensor
Systems: an Activity Recognition Perspective",
journal = j-TECS,
volume = "23",
number = "2",
pages = "20:1--20:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3641861",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3641861",
abstract = "Machine learning algorithms are increasingly used for
inference and decision-making in embedded systems. Data
from sensors are used to train machine learning models
for various smart functions of embedded and
cyber-physical systems ranging from \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "20",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kornaros:2024:FUI,
author = "George Kornaros and Svoronos Leivadaros and Filippos
Kolimbianakis",
title = "Flexible Updating of {Internet} of Things Computing
Functions through Optimizing Dynamic Partial
Reconfiguration",
journal = j-TECS,
volume = "23",
number = "2",
pages = "21:1--21:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643825",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3643825",
abstract = "With applications to become increasingly compute- and
data-intensive, requiring more processing power, many
Internet of Things (IoT) platforms in robots, drones,
and autonomous vehicles that implement neural network
inference, cryptographic functions or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Fatnassi:2024:PNN,
author = "Wael Fatnassi and Yasser Shoukry",
title = "{PolyARBerNN}: a Neural Network Guided Solver and
Optimizer for Bounded Polynomial Inequalities",
journal = j-TECS,
volume = "23",
number = "2",
pages = "22:1--22:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632970",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3632970",
abstract = "Constraints solvers play a significant role in the
analysis, synthesis, and formal verification of complex
cyber-physical systems. In this article, we study the
problem of designing a scalable constraints solver for
an important class of constraints named \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "22",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2024:CIS,
author = "Lu Li and Qi Tian and Guofeng Qin and Shuaiyu Chen and
Weijia Wang",
title = "Compact Instruction Set Extensions for {Dilithium}",
journal = j-TECS,
volume = "23",
number = "2",
pages = "23:1--23:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643826",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3643826",
abstract = "Post-quantum cryptography is considered to provide
security against both traditional and quantum computer
attacks. Dilithium is a digital signature algorithm
that derives its security from the challenge of finding
short vectors in lattices. It has been \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "23",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2024:SGC,
author = "Chin-Hsien Wu and Cheng-Tze Lee and Yi-Ren Tsai and
Cheng-Yen Wu",
title = "A Space-Grained Cleaning Method to Reduce Long-Tail
Latency of {DM-SMR} Disks",
journal = j-TECS,
volume = "23",
number = "2",
pages = "24:1--24:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3643827",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3643827",
abstract = "DM-SMR (device-managed shingled magnetic recording)
disks allocate a portion of disk space as the
persistent cache (PC) to address the issue of
overlapping tracks during data updates. When the PC
space becomes insufficient, a space cleaning is
triggered \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "24",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Deng:2024:SST,
author = "Jianing Deng and Shunjie Dong and Lvcheng Chen and
Jingtong Hu and Cheng Zhuo",
title = "{STDF}: Spatio-Temporal Deformable Fusion for Video
Quality Enhancement on Embedded Platforms",
journal = j-TECS,
volume = "23",
number = "2",
pages = "25:1--25:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3645113",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3645113",
abstract = "With the development of embedded systems and deep
learning, it is feasible to combine them for offering
various and convenient human-centered services, which
is based on high-quality (HQ) videos. However, due to
the limit of video traffic load and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhasin:2024:SIP,
author = "Shivam Bhasin and Fabrizio {De Santis} and Francesco
Regazzoni",
title = "Special Issue on Post-Quantum Cryptography for
Embedded Systems",
journal = j-TECS,
volume = "23",
number = "2",
pages = "26:1--26:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3641852",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3641852",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "26",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mujdei:2024:SCA,
author = "Catinca Mujdei and Lennert Wouters and Angshuman
Karmakar and Arthur Beckers and Jose Maria Bermudo Mera
and Ingrid Verbauwhede",
title = "Side-channel Analysis of Lattice-based Post-quantum
Cryptography: Exploiting Polynomial Multiplication",
journal = j-TECS,
volume = "23",
number = "2",
pages = "27:1--27:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3569420",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3569420",
abstract = "Polynomial multiplication algorithms such as
Toom--Cook and the Number Theoretic Transform are
fundamental building blocks for lattice-based
post-quantum cryptography. In this work we present
correlation power-analysis-based side-channel analysis
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "27",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gupta:2024:MEE,
author = "Saransh Gupta and Rosario Cammarota and Tajana
Simuni{\'c}",
title = "{MemFHE}: End-to-end Computing with Fully Homomorphic
Encryption in Memory",
journal = j-TECS,
volume = "23",
number = "2",
pages = "28:1--28:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3569955",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3569955",
abstract = "The increasing amount of data and the growing
complexity of problems have resulted in an ever-growing
reliance on cloud computing. However, many
applications, most notably in healthcare, finance, or
defense, demand security and privacy, which today's
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "28",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Thoma:2024:AAS,
author = "Jan Philipp Thoma and Darius Hartlief and Tim
G{\"u}neysu",
title = "Agile Acceleration of Stateful Hash-based Signatures
in Hardware",
journal = j-TECS,
volume = "23",
number = "2",
pages = "29:1--29:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3567426",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3567426",
abstract = "With the development of large-scale quantum computers,
the current landscape of asymmetric cryptographic
algorithms will change dramatically. Today's standards
like RSA, DSA, and ElGamal will no longer provide
sufficient security against quantum attackers
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "29",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Karl:2024:PQS,
author = "Patrick Karl and Jonas Schupp and Tim Fritzmann and
Georg Sigl",
title = "Post-Quantum Signatures on {RISC-V} with Hardware
Acceleration",
journal = j-TECS,
volume = "23",
number = "2",
pages = "30:1--30:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3579092",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/risc-v.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3579092",
abstract = "CRYSTALS-Dilithium and Falcon are digital signature
algorithms based on cryptographic lattices, which are
considered secure even if large-scale quantum computers
will be able to break conventional public-key
cryptography. Both schemes have been selected
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "30",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Elkhatib:2024:CEF,
author = "Rami Elkhatib and Brian Koziel and Reza Azarderakhsh
and Mehran Mozaffari Kermani",
title = "Cryptographic Engineering a Fast and Efficient {SIKE}
in {FPGA}",
journal = j-TECS,
volume = "23",
number = "2",
pages = "31:1--31:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3584919",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3584919",
abstract = "Recent attacks have shown that SIKE is not secure and
should not be used in its current state. However, this
work was completed before these attacks were discovered
and might be beneficial to other cryptosystems such as
SQISign. The primary downside of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "31",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Singh:2024:AEF,
author = "Richa Singh and Saad Islam and Berk Sunar and Patrick
Schaumont",
title = "Analysis of {EM} Fault Injection on Bit-sliced Number
Theoretic Transform Software in {Dilithium}",
journal = j-TECS,
volume = "23",
number = "2",
pages = "32:1--32:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3583757",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3583757",
abstract = "Bitslicing is a software implementation technique that
treats an N -bit processor datapath as N parallel
single-bit datapaths. Bitslicing is particularly useful
to implement data-parallel algorithms, algorithms that
apply the same operation sequence to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "32",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jati:2024:CCK,
author = "Arpan Jati and Naina Gupta and Anupam Chattopadhyay
and Somitra Kumar Sanadhya",
title = "A Configurable {CRYSTALS--Kyber} Hardware
Implementation with Side-Channel Protection",
journal = j-TECS,
volume = "23",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3587037",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3587037",
abstract = "In this work, we present a configurable and side
channel resistant implementation of the post-quantum
key-exchange algorithm CRYSTALS-Kyber. The implemented
design can be configured for different performance and
area requirements leading to different \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Alnahawi:2024:TNG,
author = "Nouri Alnahawi and Nicolai Schmitt and Alexander
Wiesmaier and Chiara-Marie Zok",
title = "Toward Next Generation Quantum-Safe {eIDs} and
{eMRTDs}: a Survey",
journal = j-TECS,
volume = "23",
number = "2",
pages = "34:1--34:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3585517",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3585517",
abstract = "Security mechanisms of Electronic Personal Documents
(eCards) depend on (asymmetric) cryptography that is
and always has been subject to the threat of
compromise, be it from conventional attacks or quantum
computers. With Post-Quantum Cryptography (PQC),
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "34",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ravi:2024:SCF,
author = "Prasanna Ravi and Anupam Chattopadhyay and Jan Pieter
D'Anvers and Anubhab Baksi",
title = "Side-channel and Fault-injection attacks over
Lattice-based Post-quantum Schemes ({Kyber},
{Dilithium}): Survey and New Results",
journal = j-TECS,
volume = "23",
number = "2",
pages = "35:1--35:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603170",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Apr 10 08:49:11 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3603170",
abstract = "In this work, we present a systematic study of
Side-Channel Attacks (SCA) and Fault Injection Attacks
(FIA) on structured lattice-based schemes, with main
focus on Kyber Key Encapsulation Mechanism (KEM) and
Dilithium signature scheme, which are leading
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "35",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Niu:2024:EMF,
author = "Linwei Niu and Danda B. Rawat and Dakai Zhu and
Jonathan Musselwhite and Zonghua Gu and Qingxu Deng",
title = "Energy Management for Fault-tolerant $ (m,
k)$-constrained Real-time Systems That Use
Standby-Sparing",
journal = j-TECS,
volume = "23",
number = "3",
pages = "36:1--36:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3648365",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3648365",
abstract = "Fault tolerance, energy management, and quality of
service (QoS) are essential aspects for the design of
real-time embedded systems. In this work, we focus on
exploring methods that can simultaneously address the
above three critical issues under standby-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "36",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2024:TEE,
author = "Yueting Li and Xueyan Wang and He Zhang and Biao Pan
and Keni Qiu and Wang Kang and Jun Wang and Weisheng
Zhao",
title = "Toward Energy-efficient {STT-MRAM}-based Near Memory
Computing Architecture for Embedded Systems",
journal = j-TECS,
volume = "23",
number = "3",
pages = "37:1--37:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3650729",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3650729",
abstract = "Convolutional Neural Networks (CNNs) have
significantly impacted embedded system applications
across various domains. However, this exacerbates the
real-time processing and hardware resource-constrained
challenges of embedded systems. To tackle these
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "37",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gealy:2024:CPS,
author = "Calvin B. Gealy and Alan D. George",
title = "Characterizing Parameter Scaling with Quantization for
Deployment of {CNNs} on Real-Time Systems",
journal = j-TECS,
volume = "23",
number = "3",
pages = "38:1--38:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3654799",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3654799",
abstract = "Modern deep-learning models tend to include billions
of parameters, reducing real-time performance. Embedded
systems are compute-constrained while frequently used
to deploy these models for real-time systems given
size, weight, and power requirements. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "38",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Salmon:2024:NCA,
author = "Loic Salmon and Pierre-Yves Pillain and Goulven
Guillou and Jean-Philippe Babau",
title = "{NAVIDRO}, a {CARES} architectural style for
configuring drone co-simulation",
journal = j-TECS,
volume = "23",
number = "3",
pages = "39:1--39:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3651889",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3651889",
abstract = "One primary objective of drone simulation is to
evaluate diverse drone configurations and contexts
aligned with specific user objectives. The initial
challenge for simulator designers involves managing the
heterogeneity of drone components, encompassing
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "39",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Theocharides:2024:ISI,
author = "Theocharis Theocharides and Charlotte Frenkel and
Lukas Cavigelli",
title = "Introduction to the Special Issue on {tinyML}",
journal = j-TECS,
volume = "23",
number = "3",
pages = "40:1--40:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3658375",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3658375",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "40",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lu:2024:EEE,
author = "Qianyun Lu and Boris Murmann",
title = "Enhancing the Energy Efficiency and Robustness of
{tinyML} Computer Vision Using Coarsely-quantized
Log-gradient Input Images",
journal = j-TECS,
volume = "23",
number = "3",
pages = "41:1--41:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3591466",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3591466",
abstract = "This article studies the merits of applying
log-gradient input images to convolutional neural
networks (CNNs) for tinyML computer vision (CV). We
show that log gradients enable: (i) aggressive 1-bit
quantization of first-layer inputs, (ii) potential CNN
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "41",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pavan:2024:TAD,
author = "Massimo Pavan and Eugeniu Ostrovan and Armando
Caltabiano and Manuel Roveri",
title = "{TyBox}: an Automatic Design and Code Generation
Toolbox for {TinyML} Incremental On-Device Learning",
journal = j-TECS,
volume = "23",
number = "3",
pages = "42:1--42:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604566",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3604566",
abstract = "Incremental on-device learning is one of the most
relevant and interesting challenges in the field of
Tiny Machine Learning (TinyML). Indeed, differently
from traditional TinyML solutions where the training is
typically carried out on the Cloud and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "42",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Saha:2024:TPA,
author = "Swapnil Sayan Saha and Sandeep Singh Sandha and Mohit
Aggarwal and Brian Wang and Liying Han and Julian {De
Gortari Briseno} and Mani Srivastava",
title = "{TinyNS}: Platform-aware Neurosymbolic Auto Tiny
Machine Learning",
journal = j-TECS,
volume = "23",
number = "3",
pages = "43:1--43:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603171",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3603171",
abstract = "Machine learning at the extreme edge has enabled a
plethora of intelligent, time-critical, and remote
applications. However, deploying interpretable
artificial intelligence systems that can perform
high-level symbolic reasoning and satisfy the
underlying \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "43",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mazumder:2024:RTR,
author = "Arnab Neelim Mazumder and Farshad Safavi and Maryam
Rahnemoonfar and Tinoosh Mohsenin",
title = "{Reg-Tune}: a Regression-Focused Fine-Tuning Approach
for Profiling Low Energy Consumption and Latency",
journal = j-TECS,
volume = "23",
number = "3",
pages = "44:1--44:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3623380",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3623380",
abstract = "Fine-tuning deep neural networks is pivotal for
creating inference modules that can be suitably
imported to edge or field-programmable gate array
(FPGA) platforms. Traditionally, exploration of
different parameters throughout the layers of deep
neural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "44",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhang:2024:SBN,
author = "Xinqiao Zhang and Mohammad Samragh and Siam Hussain
and Ke Huang and Farinaz Koushanfar",
title = "Scalable Binary Neural Network Applications in
Oblivious Inference",
journal = j-TECS,
volume = "23",
number = "3",
pages = "45:1--45:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607192",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607192",
abstract = "Binary neural network (BNN) delivers increased compute
intensity and reduces memory/data requirements for
computation. Scalable BNN enables inference in a
limited time due to different constraints. This paper
explores the application of Scalable BNN in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "45",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sridhar:2024:SSR,
author = "Upasana Sridhar and Nicholai Tukanov and Elliott
Binder and Tze Meng Low and Scott McMillan and Martin
D. Schatz",
title = "{SMaLL}: Software for Rapidly Instantiating Machine
Learning Libraries",
journal = j-TECS,
volume = "23",
number = "3",
pages = "46:1--46:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607870",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607870",
abstract = "Interest in deploying deep neural network (DNN)
inference on edge devices has resulted in an explosion
of the number and types of hardware platforms that
machine learning (ML) libraries must support.
High-level programming interfaces, such as TensorFlow,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "46",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Rashid:2024:TNV,
author = "Hasib-Al Rashid and Utteja Kallakuri and Tinoosh
Mohsenin",
title = "{TinyM 2 Net-V2}: a Compact Low-power Software
Hardware Architecture for Multimodal Deep Neural
Networks",
journal = j-TECS,
volume = "23",
number = "3",
pages = "47:1--47:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3595633",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3595633",
abstract = "With the evaluation of Artificial Intelligence (AI),
there has been a resurgence of interest in how to use
AI algorithms on low-power embedded systems to broaden
potential use cases of the Internet of Things (IoT). To
mimic multimodal human perception, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "47",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Cilardo:2024:ASC,
author = "Alessandro Cilardo and Vincenzo Maisto and Nicola
Mazzocca and Franca Rocco {Di Torrepadula}",
title = "An Approach to the Systematic Characterization of
Multitask Accelerated {CNN} Inference in Edge
{MPSoCs}",
journal = j-TECS,
volume = "23",
number = "3",
pages = "48:1--48:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3611015",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3611015",
abstract = "Deep Learning is ubiquitous today and is increasingly
moving from the cloud down to the edge of networked
infrastructures, where it enables embedded applications
to perform complex inference tasks close to the data
sources, reducing long-distance data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "48",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ancilotto:2024:XMM,
author = "Alberto Ancilotto and Francesco Paissan and Elisabetta
Farella",
title = "{XimSwap}: Many-to-Many Face Swapping for {TinyML}",
journal = j-TECS,
volume = "23",
number = "3",
pages = "49:1--49:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603173",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3603173",
abstract = "The unprecedented development of deep learning
approaches for video processing has caused growing
privacy concerns. To ensure data analysis while
maintaining privacy, it is essential to address how to
protect individuals' identities. One solution is to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "49",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Andrade:2024:OPV,
author = "Pedro Andrade and Ivanovitch Silva and Marianne Diniz
and Thommas Flores and Daniel G. Costa and Eduardo
Soares",
title = "Online Processing of Vehicular Data on the Edge
Through an Unsupervised {TinyML} Regression Technique",
journal = j-TECS,
volume = "23",
number = "3",
pages = "50:1--50:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3591356",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3591356",
abstract = "The Internet of Things (IoT) has made it possible to
include everyday objects in a connected network,
allowing them to intelligently process data and respond
to their environment. Thus, it is expected that those
objects will gain an intelligent \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "50",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{ElAdoui:2024:CTM,
author = "Mohammed {El Adoui} and Thomas Herpoel and Beno{\^\i}t
Fr{\'e}nay",
title = "Constrained Tiny Machine Learning for Predicting Gas
Concentration with {I4.0} Low-cost Sensors",
journal = j-TECS,
volume = "23",
number = "3",
pages = "51:1--51:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3590956",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3590956",
abstract = "Low-cost gas sensors (LCS) often produce inaccurate
measurements due to varying environmental conditions
that are not consistent with laboratory settings,
leading to inadequate productivity levels compared to
high-quality sensors. To address this issue, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "51",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2024:IHA,
author = "Chaojian Li and Kyungmin Kim and Bichen Wu and Peizhao
Zhang and Hang Zhang and Xiaoliang Dai and Peter Vajda
and Yingyan (Celine) Lin",
title = "An Investigation on Hardware-Aware Vision Transformer
Scaling",
journal = j-TECS,
volume = "23",
number = "3",
pages = "52:1--52:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3611387",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3611387",
abstract = "Vision Transformer (ViT) has demonstrated promising
performance in various computer vision tasks, and
recently attracted a lot of research attention. Many
recent works have focused on proposing new
architectures to improve ViT and deploying it into
real-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "52",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hussein:2024:SND,
author = "Dina Hussein and Ganapati Bhat",
title = "{SensorGAN}: a Novel Data Recovery Approach for
Wearable Human Activity Recognition",
journal = j-TECS,
volume = "23",
number = "3",
pages = "53:1--53:??",
month = may,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3609425",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Tue May 14 06:30:25 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3609425",
abstract = "Human activity recognition (HAR) and, more broadly,
activities of daily life recognition using wearable
devices have the potential to transform a number of
applications, including mobile healthcare, smart homes,
and fitness monitoring. Recent approaches \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "53",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hoag:2024:RFM,
author = "Ellis Hoag and Kyungwoo Lee and Julian Mestre and
Sergey Pupyrev and Yongkang Zhu",
title = "Reordering Functions in Mobiles Apps for Reduced Size
and Faster Start-Up",
journal = j-TECS,
volume = "23",
number = "4",
pages = "54:1--54:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3660635",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3660635",
abstract = "Function layout, also known as function reordering or
function placement, is one of the most effective
profile-guided compiler optimizations. By reordering
functions in a binary, compilers can improve the
performance of large-scale applications or reduce
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "54",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ren:2024:DOL,
author = "Haoyu Ren and Darko Anicic and Xue Li and Thomas
Runkler",
title = "On-device Online Learning and Semantic Management of
{TinyML} Systems",
journal = j-TECS,
volume = "23",
number = "4",
pages = "55:1--55:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665278",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665278",
abstract = "Recent advances in Tiny Machine Learning (TinyML)
empower low-footprint embedded devices for real-time
on-device Machine Learning (ML). While many acknowledge
the potential benefits of TinyML, its practical
implementation presents unique challenges. This
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "55",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhade:2024:LHB,
author = "Pavitra Bhade and Joseph Paturel and Olivier Sentieys
and Sharad Sinha",
title = "Lightweight Hardware-Based Cache Side-Channel Attack
Detection for Edge Devices ({Edge-CaSCADe})",
journal = j-TECS,
volume = "23",
number = "4",
pages = "56:1--56:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3663673",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3663673",
abstract = "Cache Side-Channel Attacks (CSCAs) have been haunting
most processor architectures for decades now. Existing
approaches to mitigation of such attacks have certain
drawbacks, namely software mishandling, performance
overhead, and low throughput due to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "56",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chielle:2024:CBM,
author = "Eduardo Chielle and Oleg Mazonka and Homer Gamil and
Michail Maniatakos",
title = "Coupling bit and modular arithmetic for efficient
general-purpose fully homomorphic encryption",
journal = j-TECS,
volume = "23",
number = "4",
pages = "57:1--57:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665280",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665280",
abstract = "Fully Homomorphic Encryption (FHE) enables computation
directly on encrypted data. This property is desirable
for outsourced computation of sensitive data as it
relies solely on the underlying security of the
cryptosystem and not in access control \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "57",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Boudardara:2024:RAM,
author = "Fateh Boudardara and Abderraouf Boussif and
Pierre-Jean Meyer and Mohamed Ghazel",
title = "A Review of Abstraction Methods Toward Verifying
Neural Networks",
journal = j-TECS,
volume = "23",
number = "4",
pages = "58:1--58:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3617508",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3617508",
abstract = "Neural networks as a machine learning technique are
increasingly deployed in various domains. Despite their
performance and their continuous improvement, the
deployment of neural networks in safety-critical
systems, in particular for autonomous mobility,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "58",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ulus:2024:ETP,
author = "Dogan Ulus and Thomas Ferr{\`e}re and Eugene Asarin
and Dejan Nickovic and Oded Maler",
title = "Elements of Timed Pattern Matching",
journal = j-TECS,
volume = "23",
number = "4",
pages = "59:1--59:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3645114",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3645114",
abstract = "The rise of machine learning and cloud technologies
has led to a remarkable influx of data within modern
cyber-physical systems. However, extracting meaningful
information from this data has become a significant
challenge due to its volume and complexity. Timed
pattern matching has emerged as a powerful
specification-based runtime verification and temporal
data analysis technique to address this
challenge.\par
n this paper, we provide a comprehensive tutorial on
timed pattern matching that ranges from the underlying
algebra and pattern specification languages to
performance analyses and practical case studies.
Analogous to textual pattern matching, timed pattern
matching is the task of finding all time periods within
temporal behaviors of cyber-physical systems that match
a predefined pattern. Originally we introduced and
solved several variants of the problem using the name
of match sets, which has evolved into the concept of
timed relations over the past decade. Here we first
formalize and present the algebra of timed relations as
a standalone mathematical tool to solve the pattern
matching problem of timed pattern specifications. In
particular, we show how to use the algebra of timed
relations to solve the pattern matching problem for
timed regular expressions and metric compass logic in a
unified manner. We experimentally demonstrate that our
timed pattern matching approach performs and scales
well in practice. We further provide in-depth insights
into the similarities and fundamental differences
between monitoring and matching problems as well as
regular expressions and temporal logic formulas.
Finally, we illustrate the practical application of
timed pattern matching through two case studies, which
show how to extract structured information from
temporal datasets obtained via simulations or
real-world observations. These results and examples
show that timed pattern matching is a rigorous and
efficient technique in developing and analyzing
cyber-physical systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "59",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Panopoulos:2024:CCA,
author = "Ioannis Panopoulos and Stylianos Venieris and Iakovos
Venieris",
title = "{CARIn}: Constraint-Aware and Responsive Inference on
Heterogeneous Devices for Single- and Multi-{DNN}
Workloads",
journal = j-TECS,
volume = "23",
number = "4",
pages = "60:1--60:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665868",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665868",
abstract = "The relentless expansion of deep learning applications
in recent years has prompted a pivotal shift toward
on-device execution, driven by the urgent need for
real-time processing, heightened privacy concerns, and
reduced latency across diverse domains. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "60",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Chakraborty:2024:TTA,
author = "Shounak Chakraborty and Yanshul Sharma and Sanjay
Moulik",
title = "{TREAFET}: Temperature-Aware Real-Time Task Scheduling
for {FinFET} based Multicores",
journal = j-TECS,
volume = "23",
number = "4",
pages = "61:1--61:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665276",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665276",
abstract = "The recent shift in the VLSI industry from
conventional MOSFET to FinFET for designing
contemporary chip-multiprocessor (CMP) has noticeably
improved hardware platforms' computing capabilities,
but at the cost of several thermal issues. Unlike the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "61",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Algahtani:2024:HAA,
author = "Eyad Algahtani",
title = "A Hardware Approach For Accelerating Inductive
Learning In Description Logic",
journal = j-TECS,
volume = "23",
number = "4",
pages = "62:1--62:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665277",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665277",
abstract = "The employment of Machine Learning (ML) techniques in
embedded systems has seen constant growth in recent
years, especially for black-box ML techniques (such as
Artificial Neural Networks (ANNs)). However, despite
the successful employment of ML \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "62",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pan:2024:MTR,
author = "Yungang Pan and Rouhollah Mahfouzi and Soheil Samii
and Petru Eles and Zebo Peng",
title = "Multi-Traffic Resource Optimization for Real-Time
Applications with {5G} Configured {Grant} Scheduling",
journal = j-TECS,
volume = "23",
number = "4",
pages = "63:1--63:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3664621",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3664621",
abstract = "The fifth-generation (5G) technology standard in
telecommunications is expected to support
ultra-reliable low latency communication to enable
real-time applications such as industrial automation
and control. 5G configured grant (CG) scheduling
features a \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "63",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hada:2024:DCH,
author = "Rupendra Pratap Singh Hada and Abhishek Srivastava",
title = "Dynamic Cluster Head Selection in {WSN}",
journal = j-TECS,
volume = "23",
number = "4",
pages = "64:1--64:??",
month = jul,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665867",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:15 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665867",
abstract = "A Wireless Sensor Network (WSN) comprises an ad-hoc
network of nodes laden with sensors that are used to
monitor a region mostly in the outdoors and often not
easily accessible. Despite exceptions, several
deployments of WSN continue to grapple with the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "64",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Deantoni:2024:ISI,
author = "Julien Deantoni and Alain Girault and Daniel Grosse",
title = "Introduction to the Special Issue on Specification and
Design Languages ({FDL 2021})",
journal = j-TECS,
volume = "23",
number = "5",
pages = "65:1--65:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677316",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3677316",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "65",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Grimm:2024:LGM,
author = "Lena Grimm and Steven Smyth and Alexander
Schulz-Rosengarten and Reinhard von Hanxleden and Marc
Pouzet",
title = "From {Lustre} to Graphical Models and {SCCharts}",
journal = j-TECS,
volume = "23",
number = "5",
pages = "66:1--66:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544973",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544973",
abstract = "We introduce a systematic approach for automatically
creating a visual diagram, akin to the graphical Safety
Critical Application Development Environment (SCADE)
model, from a Lustre program. This not only saves
tedious manual drawing effort but also \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "66",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Goli:2024:ESI,
author = "Mehran Goli and Rolf Drechsler",
title = "Early {SoCs} Information Flow Policies Validation
Using {SystemC}-Based Virtual Prototypes at the {ESL}",
journal = j-TECS,
volume = "23",
number = "5",
pages = "67:1--67:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3544780",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3544780",
abstract = "Virtual Prototypes (VPs) at the Electronic System
Level (ESL) are being increasingly adopted by the
semiconductor industry and play an important role in
modernizing the System-on-Chips (SoCs) design flow to
raise design productivity and reduce time-to-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "67",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Benmaghnia:2024:CGN,
author = "Hanane Benmaghnia and Matthieu Martel and Yassamine
Seladji",
title = "Code Generation for Neural Networks Based on
Fixed-point Arithmetic",
journal = j-TECS,
volume = "23",
number = "5",
pages = "68:1--68:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3563945",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3563945",
abstract = "Over the past few years, neural networks have started
penetrating safety critical systems to make decisions
as, for example, in robots, rockets, and autonomous
driving cars. Neural networks based on floating-point
arithmetic are very time and memory consuming, which
are not compatible with embedded systems known to have
limited resources. They are also very sensitive to the
precision in which they have been trained, so changing
this precision generally degrades the quality of their
answers. To deal with that, we introduce a new
technique to generate a fixed-point code for a trained
neural network. This technique is based on fixed-point
arithmetic with mixed-precision. This arithmetic is
based on integer operations only, which are compatible
with small memory devices. The obtained neural network
has the same behavior as the initial one (based on the
floating-point arithmetic) up to an error threshold
defined by the user. The experimental results show the
efficiency of our tool SyFix in terms of memory saved
and the accuracy of the computations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "68",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hui:2024:SSM,
author = "John Hui and Stephen A. Edwards",
title = "The Sparse Synchronous Model on Real Hardware",
journal = j-TECS,
volume = "23",
number = "5",
pages = "69:1--69:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3572920",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3572920",
abstract = "We present the Sparse Synchronous model (SSM) of
computation, which allows a programmer to specify
software timing more precisely than the traditional
``heartbeat'' of mainstream operating systems or the
synchronous languages. SSM is a mix of semantics
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "69",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Aguado:2024:SSM,
author = "Joaqu{\'\i}n Aguado and Alejandra Duenas",
title = "Synchronised Shared Memory and Model Checking",
journal = j-TECS,
volume = "23",
number = "5",
pages = "70:1--70:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626188",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3626188",
abstract = "In this article, a formal generic framework for
defining and reasoning about deterministic concurrency
in synchronous systems is implemented in the Spin model
checker. Concretely, the work implements the
clock-synchronised shared memory (csm) theory,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "70",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Thuijsman:2024:SCD,
author = "Sander Thuijsman and Michel Reniers",
title = "Supervisory Control for Dynamic Feature Configuration
in Product Lines",
journal = j-TECS,
volume = "23",
number = "5",
pages = "71:1--71:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3579644",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3579644",
abstract = "In this paper a framework for engineering supervisory
controllers for product lines with dynamic feature
configuration is proposed. The variability in valid
configurations is described by a feature model.
Behavior of system components is achieved using (.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "71",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Peres:2024:PTU,
author = "Florent Peres and Mohamed Ghazel",
title = "A Proven Translation from a {UML} State Machine Subset
to Timed Automata",
journal = j-TECS,
volume = "23",
number = "5",
pages = "72:1--72:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3581771",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3581771",
abstract = "Although Unified Modeling Language (UML) state
machines constitute a convenient modeling formalism
that is widely used in many applications, the lack of
formal semantics impedes carrying out automatic
processing, such as formal verification. In this
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "72",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gauthier:2024:HRE,
author = "Lovic Gauthier and Yohei Ishikawa",
title = "{HDLRuby}: a {Ruby} Extension for Hardware Description
and Its Translation to Synthesizable {Verilog HDL}",
journal = j-TECS,
volume = "23",
number = "5",
pages = "73:1--73:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3581757",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3581757",
abstract = "HDLRuby is a new hardware description language defined
as an extension of the Ruby programming language aiming
to improve circuit design productivity. HDLRuby allows
to model digital circuits at the register transfer
level while supporting high-level \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "73",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lumpp:2024:DFB,
author = "Francesco Lumpp and Marco Panato and Nicola Bombieri
and Franco Fummi",
title = "A Design Flow Based on {Docker} and {Kubernetes} for
{ROS}-based Robotic Software Applications",
journal = j-TECS,
volume = "23",
number = "5",
pages = "74:1--74:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3594539",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3594539",
abstract = "Human-centered robotic applications are becoming
pervasive in the context of robotics and smart
manufacturing, and such a pervasiveness is even more
expected with the shift to Industry 5.0. The always
increasing level of autonomy of modern robotic
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "74",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Arasteh:2024:FLT,
author = "Emad M. Arasteh and Rainer D{\"o}mer",
title = "Fast Loosely-Timed Deep Neural Network Models with
Accurate Memory Contention",
journal = j-TECS,
volume = "23",
number = "5",
pages = "75:1--75:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607548",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607548",
abstract = "The emergence of data-intensive applications, such as
Deep Neural Networks (DNN), exacerbates the well-known
memory bottleneck in computer systems and demands early
attention in the design flow. Electronic System-Level
(ESL) design using SystemC \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "75",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gretz:2024:SSM,
author = "Friedrich Gretz and Franz-Josef Grosch and Michael
Mendler and Stephan Scheele",
title = "Synchronized Shared Memory and Black-box Procedural
Abstraction: Toward a Formal Semantics of Blech",
journal = j-TECS,
volume = "23",
number = "5",
pages = "76:1--76:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3571585",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3571585",
abstract = "Traditional imperative synchronous programming
languages heavily rely on a strict separation between
data memory and communication signals. Signals can be
shared between computational units but cannot be
overwritten within a synchronous reaction cycle.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "76",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Lohstroh:2024:DCA,
author = "Marten Lohstroh and Soroush Bateni and Christian
Menard and Alexander Schulz-Rosengarten and Jeronimo
Castrillon and Edward A. Lee",
title = "Deterministic Coordination across Multiple Timelines",
journal = j-TECS,
volume = "23",
number = "5",
pages = "77:1--77:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3615357",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3615357",
abstract = "We discuss a novel approach for constructing
deterministic reactive systems that revolves around a
temporal model that incorporates a multiplicity of
timelines. This model is central to Lingua Franca (LF),
a polyglot coordination language and compiler
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "77",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Narang:2024:TTE,
author = "Gaurav Narang and Chukwufumnanya Ogbogu and Janardhan
Rao Doppa and Partha Pratim Pande",
title = "{TEFLON}: Thermally Efficient Dataflow-aware {$3$D}
{NoC} for Accelerating {CNN} Inferencing on Manycore
{PIM} Architectures",
journal = j-TECS,
volume = "23",
number = "5",
pages = "78:1--78:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665279",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3665279",
abstract = "Resistive random-access memory (ReRAM)-based
processing-in-memory (PIM) architectures are used
extensively to accelerate inferencing/training with
convolutional neural networks (CNNs). Three-dimensional
(3D) integration is an enabling technology to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "78",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2024:HSD,
author = "Xingbin Wang and Boyan Zhao and Yulan Su and Sisi
Zhang and Fengkai Yuan and Jun Zhang and Dan Meng and
Rui Hou",
title = "A Hybrid Sparse-dense Defensive {DNN} Accelerator
Architecture against Adversarial Example Attacks",
journal = j-TECS,
volume = "23",
number = "5",
pages = "79:1--79:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677318",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3677318",
abstract = "Understanding how to defend against adversarial
attacks is crucial for ensuring the safety and
reliability of these systems in real-world
applications. Various adversarial defense methods are
proposed, which aim at improving the robustness of
neural \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "79",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Dixit:2024:PPA,
author = "Akanksha Dixit and Smruti R. Sarangi",
title = "{PredATW}: Predicting the Asynchronous Time Warp
Latency For {VR} Systems",
journal = j-TECS,
volume = "23",
number = "5",
pages = "80:1--80:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677329",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3677329",
abstract = "With the advent of low-power ultra-fast hardware and
GPUs, virtual reality (VR) has gained a lot of
prominence in the past few years and is being used in
various areas, such as education, entertainment,
scientific visualization, and computer-aided design.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "80",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2024:DMD,
author = "Chia-Hao Li and Niraj K. Jha",
title = "{DOCTOR}: a Multi-Disease Detection Continual Learning
Framework Based on Wearable Medical Sensors",
journal = j-TECS,
volume = "23",
number = "5",
pages = "81:1--81:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3679050",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3679050",
abstract = "Modern advances in machine learning (ML) and wearable
medical sensors (WMSs) in edge devices have enabled
ML-driven disease detection for smart healthcare.
Conventional ML-driven methods for disease detection
rely on customizing individual models for each
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "81",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Hafezan:2024:TFD,
author = "Mohammad Hassan Hafezan and Ehsan Atoofian",
title = "Transient Fault Detection in Tensor Cores for Modern
{GPUs}",
journal = j-TECS,
volume = "23",
number = "5",
pages = "82:1--82:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687483",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687483",
abstract = "Deep neural networks (DNNs) have emerged as an
effective solution for many machine learning
applications. However, the great success comes with the
cost of excessive computation. The Volta graphics
processing unit (GPU) from NVIDIA introduced a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "82",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wang:2024:LBR,
author = "Meng Wang and Yiqin Lu and Haihan Wang and Zhuoxing
Chen and Jiancheng Qin",
title = "Load-balanced Routing Heuristics for Bandwidth
Allocation of {AVB} Flow in {TSN}",
journal = j-TECS,
volume = "23",
number = "5",
pages = "83:1--83:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687307",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687307",
abstract = "Time-Sensitive Networking (TSN) is a new technology
developed from Ethernet that guarantees deterministic
transmission of various types of flows, such as
Time-triggered (TT) flows and Audio-video-bridging
(AVB) flows, in the same network. Currently, Time-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "83",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gomez:2024:ODF,
author = "Clara Gomez and Davron Patkhullaev and Alejandra C.
Hernandez",
title = "{OffloaD}: Detection Failure-based Scheduler for
Offloading Object Detection",
journal = j-TECS,
volume = "23",
number = "5",
pages = "84:1--84:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677321",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:16 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3677321",
abstract = "The current times ask for resource-constrained devices
such as drones, light mobile robots, XR glasses, or
mobile phones to perform object detection efficiently
and in real time. However, when executed on the device,
object detection fails to achieve the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "84",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Shi:2024:ISI,
author = "Liang Shi and Jingtong Shi and Hussam Amrouch and
Kuan-Hsun Chen and Mengying Zhao and Weichen Liu",
title = "Introduction to Special Issue on In\slash Near Memory
and Storage Computing for Embedded Systems",
journal = j-TECS,
volume = "23",
number = "6",
pages = "85:1--85:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677018",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3677018",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "85",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Li:2024:ATV,
author = "Fenfang Li and Huizhang Luo and Junqi Wang and Yida Li
and Zhuo Tang and Kenli Li",
title = "{AMP}: Total Variation Reduction for Lossless
Compression via Approximate Median-based
Preconditioning",
journal = j-TECS,
volume = "23",
number = "6",
pages = "86:1--86:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605359",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605359",
abstract = "With the increasing scale of cloud computing
applications of next-generation embedded systems, a
major challenge that domain scientists are facing is
how to efficiently store and analyze the vast volume of
output data. Compression can reduce the amount of
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Ye:2024:HEA,
author = "Chongnan Ye and Meng Chen and Qisheng Jiang and
Chundong Wang",
title = "{Hercules}: Enabling Atomic Durability for Persistent
Memory with Transient Persistence Domain",
journal = j-TECS,
volume = "23",
number = "6",
pages = "87:1--87:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3607473",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607473",
abstract = "Persistent memory (pmem) products bring the
persistence domain up to the memory level. Intel
recently introduced the eADR feature that guarantees to
flush data buffered in CPU cache to pmem on a power
outage, thereby making the CPU cache a transient
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "87",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Du:2024:AMC,
author = "Sichun Du and Jun Li and Chen Sun and Pingdan Xiao and
Qinghui Hong and Jiliang Zhang",
title = "Analog In-memory Circuit Design of Polynomial
Multiplication for Lattice Cipher Acceleration
Application",
journal = j-TECS,
volume = "23",
number = "6",
pages = "88:1--88:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3605891",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3605891",
abstract = "As the core operation of lattice cipher, large-scale
polynomial multiplication is the biggest computational
bottleneck in its realization process. How to quickly
calculate polynomial multiplication under resource
constraints has become an urgent problem \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "88",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Gao:2024:SSW,
author = "Xin Gao and Hongyue Wang and Yiyan Chen and Yuhao
Zhang and Zhaoyan Shen and Lei Ju",
title = "Static Scheduling of Weight Programming for {DNN}
Acceleration with Resource Constrained {PIM}",
journal = j-TECS,
volume = "23",
number = "6",
pages = "89:1--89:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3615657",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3615657",
abstract = "Most existing architectural studies on ReRAM-based
processing-in-memory (PIM) DNN accelerators assume that
all weights of the DNN can be mapped to the crossbar at
once. However, these studies are over-idealized. ReRAM
crossbar resources for calculation \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "89",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bhunia:2024:RDS,
author = "Kousik Bhunia and Arighna Deb and Kamalika Datta and
Muhammad Hassan and Saeideh Shirinzadeh and Rolf
Drechsler",
title = "{ReSG}: a Data Structure for Verification of
Majority-based In-memory Computing on {ReRAM}
Crossbars",
journal = j-TECS,
volume = "23",
number = "6",
pages = "90:1--90:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3615358",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3615358",
abstract = "Recent advancements in the fabrication of Resistive
Random Access Memory (ReRAM) devices have led to the
development of large-scale crossbar structures.
In-memory computing architectures relying on ReRAM
crossbars aim to mitigate the processor-memory
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "90",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Liang:2024:REE,
author = "Dehua Liang and Hiromitsu Awano and Noriyuki Miura and
Jun Shiomi",
title = "A Robust and Energy Efficient Hyperdimensional
Computing System for Voltage-scaled Circuits",
journal = j-TECS,
volume = "23",
number = "6",
pages = "91:1--91:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3620671",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3620671",
abstract = "Voltage scaling is one of the most promising
approaches for energy efficiency improvement but also
brings challenges to fully guaranteeing stable
operation in modern VLSI. To tackle such issues, we
further extend the DependableHD to the second version
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "91",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Byun:2024:AMB,
author = "Hongsu Byun and Safdar Jamil and Jungwook Han and
Sungyong Park and Myungcheol Lee and Changsoo Kim and
Beongjun Choi and Youngjae Kim",
title = "An Analytical Model-based Capacity Planning Approach
for Building {CSD}-based Storage Systems",
journal = j-TECS,
volume = "23",
number = "6",
pages = "92:1--92:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3623677",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3623677",
abstract = "The data movement in large-scale computing facilities
(from compute nodes to data nodes) is categorized as
one of the major contributors to high cost and energy
utilization. To tackle it, in-storage processing (ISP)
within storage devices, such as Solid-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "92",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Sun:2024:ACA,
author = "Hui Sun and Bendong Lou and Chao Zhao and Deyan Kong
and Chaowei Zhang and Jianzhong Huang and Yinliang Yue
and Xiao Qin",
title = "Asynchronous Compaction Acceleration Scheme for
Near-data Processing-enabled {LSM}-tree-based {KV}
Stores",
journal = j-TECS,
volume = "23",
number = "6",
pages = "93:1--93:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626097",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3626097",
abstract = "LSM-tree-based key-value stores (KV stores) convert
random-write requests to sequence-write ones to achieve
high I/O performance. Meanwhile, compaction operations
in KV stores update SSTables in forms of reorganizing
low-level data components to high-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "93",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Bera:2024:SSP,
author = "Pavia Bera and Stephen Cahoon and Sanjukta Bhanja and
Alex Jones",
title = "{SPIMulator}: a Spintronic Processing-in-memory
Simulator for Racetracks",
journal = j-TECS,
volume = "23",
number = "6",
pages = "94:1--94:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3645112",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3645112",
abstract = "In-memory processing is becoming a popular method to
alleviate the memory bottleneck of the Von Neumann
computing model. With the goal of improving both
latency and energy cost associated with such in-memory
processing, emerging non-volatile memory \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "94",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Zhou:2024:RRC,
author = "Kunyu Zhou and Keni Qiu",
title = "{REC}: {REtime} Convolutional Layers to Fully Exploit
Harvested Energy for {ReRAM}-based {CNN} Accelerators",
journal = j-TECS,
volume = "23",
number = "6",
pages = "95:1--95:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3652593",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3652593",
abstract = "As the Internet of Things (IoTs) increasingly combines
AI technology, it is a trend to deploy neural network
algorithms at edges and make IoT devices more
intelligent than ever. Moreover, energy-harvesting
technology-based IoT devices have shown the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "95",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Pandey:2024:NTM,
author = "Shailja Pandey and Preeti Ranjan Panda",
title = "{NeuroTAP}: Thermal and Memory Access Pattern-Aware
Data Mapping on {$3$D} {DRAM} for Maximizing {DNN}
Performance",
journal = j-TECS,
volume = "23",
number = "6",
pages = "96:1--96:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3677178",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3677178",
abstract = "Deep neural networks (DNNs) have been widely adopted,
owing to break-through performance and high accuracy.
DNNs exhibit varying memory behavior involving specific
and recognizable memory access patterns and access
intensity, depending on the selected \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "96",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Wu:2024:HPP,
author = "Zhuanhao Wu and Anirudh Kaushik and Hiren Patel",
title = "High Performance and Predictable Shared Last-level
Cache for Safety-Critical Systems",
journal = j-TECS,
volume = "23",
number = "6",
pages = "97:1--97:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687308",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687308",
abstract = "We propose ZeroCost-LLC (ZCLLC), a novel shared
inclusive last-level cache (LLC) design for timing
predictable multi-core platforms that offers lower
worst-case latency (WCL) when compared with a
traditional shared inclusive LLC design. ZCLLC achieves
low \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "97",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Xu:2024:ODI,
author = "Runqing Xu and Debiao He and Min Luo and Cong Peng and
Xiangyong Zeng",
title = "Optimizing {Dilithium} Implementation with
{AVX2\slash-512}",
journal = j-TECS,
volume = "23",
number = "6",
pages = "98:1--98:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687309",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687309",
abstract = "Dilithium is a signature scheme that is currently
being standardized to the Module-Lattice-Based Digital
Signature Standard by NIST. It is believed to be secure
even against attacks from large-scale quantum computers
based on lattice problems. The \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "98",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kashikar:2024:CWA,
author = "Prachi Kashikar and Olivier Sentieys and Sharad
Sinha",
title = "Combining Weight Approximation, Sharing and Retraining
for Neural Network Model Compression",
journal = j-TECS,
volume = "23",
number = "6",
pages = "99:1--99:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687466",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687466",
abstract = "Neural network model compression is very important to
achieve model deployment based on the memory and
storage available in different computing systems.
Generally, the continuous drive for higher accuracy in
these models increases their size and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "99",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Weerasena:2024:RCA,
author = "Hansika Weerasena and Prabhat Mishra",
title = "Revealing {CNN} Architectures via Side-Channel
Analysis in Dataflow-based Inference Accelerators",
journal = j-TECS,
volume = "23",
number = "6",
pages = "100:1--100:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3688001",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3688001",
abstract = "Convolutional Neural Networks (CNNs) are widely used
in various domains, including image recognition,
medical diagnosis and autonomous driving. Recent
advances in dataflow-based CNN accelerators have
enabled CNN inference in resource-constrained edge
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "100",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Mamish:2024:NSP,
author = "John Mamish and Rawan Alharbi and Sougata Sen and
Shashank Holla and Panchami Kamath and Yaman Sangar and
Nabil Alshurafa and Josiah Hester",
title = "{NIR-sighted}: a Programmable Streaming Architecture
for Low-Energy Human-Centric Vision Applications",
journal = j-TECS,
volume = "23",
number = "6",
pages = "101:1--101:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3672076",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3672076",
abstract = "Human studies often rely on wearable lifelogging
cameras that capture videos of individuals and their
surroundings to aid in visual confirmation or
recollection of daily activities like eating, drinking,
and smoking. However, this may include private or
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "101",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Jiang:2024:EMB,
author = "Zijing Jiang and Qun Ding and An Wang",
title = "Efficient Multi-Byte Power Analysis Architecture
Focusing on Bitwise Linear Leakage",
journal = j-TECS,
volume = "23",
number = "6",
pages = "102:1--102:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687484",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687484",
abstract = "As the most commonly used side-channel analysis
method, Correlation Power Analysis (CPA) usually uses
the divide-and-conquer strategy to guess the
single-byte key in the scenario of block cipher
parallel implementation. However, this method cannot
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "102",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Basak:2024:ELM,
author = "Barnali Basak and Pallab Dasgupta and Arpan Pal",
title = "Efficient Low-Memory Implementation of Sparse {CNNs}
Using Encoded Partitioned Hybrid Sparse Format",
journal = j-TECS,
volume = "23",
number = "6",
pages = "103:1--103:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687239",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3687239",
abstract = "Certain data compression techniques like pruning leads
to unstructured sparse Convolution Neural Network (CNN)
models without directly leveraging sparsity in
optimizing both memory consumption and inference
latency of a model having low to medium \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "103",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}
@Article{Kutukcu:2024:SAI,
author = "Basar Kutukcu and Sabur Baidya and Sujit Dey",
title = "{SLEXNet}: Adaptive Inference Using Slimmable Early
Exit Neural Networks",
journal = j-TECS,
volume = "23",
number = "6",
pages = "104:1--104:??",
month = nov,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3689632",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Sep 25 11:16:17 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
URL = "https://dl.acm.org/doi/10.1145/3689632",
abstract = "Deep learning is a proven method in many applications.
However, it requires high computation resources and
usually has a constant architecture. Mobile systems are
good candidates to benefit from deep learning
applications since they are closely integrated
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Embed. Comput. Syst.",
articleno = "104",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "https://dl.acm.org/loi/tecs",
}